From patchwork Wed Mar 31 16:53:15 2010
Content-Type: text/plain; charset="utf-8"
MIME-Version: 1.0
Content-Transfer-Encoding: 7bit
X-Patchwork-Submitter: Jiaqing Du <jiaqing@gmail.com>
X-Patchwork-Id: 89986
Received: from vger.kernel.org (vger.kernel.org [209.132.180.67])
	by demeter.kernel.org (8.14.3/8.14.3) with ESMTP id o2VGrVQ5017449
	for <patchwork-kvm@patchwork.kernel.org>;
	Wed, 31 Mar 2010 16:53:32 GMT
Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand
	id S1757764Ab0CaQxU (ORCPT
	<rfc822;patchwork-kvm@patchwork.kernel.org>);
	Wed, 31 Mar 2010 12:53:20 -0400
Received: from mail-fx0-f227.google.com ([209.85.220.227]:63463 "EHLO
	mail-fx0-f227.google.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org
	with ESMTP id S1757616Ab0CaQxT (ORCPT <rfc822; kvm@vger.kernel.org>);
	Wed, 31 Mar 2010 12:53:19 -0400
Received: by fxm27 with SMTP id 27so252884fxm.28
	for <kvm@vger.kernel.org>; Wed, 31 Mar 2010 09:53:17 -0700 (PDT)
DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gmail.com; s=gamma;
	h=domainkey-signature:mime-version:received:date:received:message-id
	:subject:from:to:cc:content-type;
	bh=DNTbVpSIqwDIrJJs0+NKv26RpNkJ8XoNVsMVljQ1Co0=;
	b=V169pxHVVG4MaLtSo9a1HH3b9BB/XP7I5cGYXyunC/y0hp53HIBOlwUnQ369ImYIrX
	2gWOaIwZwNNMt2qWifazlcZsQBuaGtkzLL8d9dnF3BMEbgLQB7vrCnRma/y3UEJ4Gm9W
	JxVEGY2ZlInZG6mje87LdEiaK5y6gTyoU9/2U=
DomainKey-Signature: a=rsa-sha1; c=nofws; d=gmail.com; s=gamma;
	h=mime-version:date:message-id:subject:from:to:cc:content-type;
	b=tEMRZ5LGPzRGEu0/axZ+cRJdBYwGQOk6blB8Ud/41tFbr9PMENkEXL47hSyVcT6nfX
	MEtythEE3i9h5RFnMbBNR4UIvg7E0lQzR+6UQLLc3ImuFDB3CUphNmCN/f+O94kia5n2
	hiLS7+3H9ml65C6x/ThRSi0MF/na3LELBNu0k=
MIME-Version: 1.0
Received: by 10.239.154.207 with HTTP; Wed, 31 Mar 2010 09:53:15 -0700 (PDT)
Date: Wed, 31 Mar 2010 18:53:15 +0200
Received: by 10.239.152.130 with SMTP id v2mr750069hbb.168.1270054395445;
	Wed, 31 Mar 2010 09:53:15 -0700 (PDT)
Message-ID: <6d8082041003310953p33e30819vbb7c2a122bd6becd@mail.gmail.com>
Subject: Some Code for Performance Profiling
From: Jiaqing Du <jiaqing@gmail.com>
To: kvm@vger.kernel.org
Cc: Nipun sehrawat <nipunsehrawatns@gmail.com>
Sender: kvm-owner@vger.kernel.org
Precedence: bulk
List-ID: <kvm.vger.kernel.org>
X-Mailing-List: kvm@vger.kernel.org
X-Greylist: IP, sender and recipient auto-whitelisted, not delayed by
	milter-greylist-4.2.3 (demeter.kernel.org [140.211.167.41]);
	Wed, 31 Mar 2010 16:53:32 +0000 (UTC)


=============Guest-wide profiling with domain-switch, for
Linux-2.6.32==================

diff --git a/arch/x86/include/asm/thread_info.h
b/arch/x86/include/asm/thread_info.h
index d27d0a2..b749b5d 100644
--- a/arch/x86/include/asm/thread_info.h
+++ b/arch/x86/include/asm/thread_info.h
@@ -96,6 +96,7 @@ struct thread_info {
 #define TIF_DS_AREA_MSR		26      /* uses thread_struct.ds_area_msr */
 #define TIF_LAZY_MMU_UPDATES	27	/* task is updating the mmu lazily */
 #define TIF_SYSCALL_TRACEPOINT	28	/* syscall tracepoint instrumentation */
+#define TIF_VPMU_CTXSW      29  /* KVM thread tag */

 #define _TIF_SYSCALL_TRACE	(1 << TIF_SYSCALL_TRACE)
 #define _TIF_NOTIFY_RESUME	(1 << TIF_NOTIFY_RESUME)
@@ -119,6 +120,7 @@ struct thread_info {
 #define _TIF_DS_AREA_MSR	(1 << TIF_DS_AREA_MSR)
 #define _TIF_LAZY_MMU_UPDATES	(1 << TIF_LAZY_MMU_UPDATES)
 #define _TIF_SYSCALL_TRACEPOINT	(1 << TIF_SYSCALL_TRACEPOINT)
+#define _TIF_VPMU_CTXSW         (1 << TIF_VPMU_CTXSW)

 /* work to do in syscall_trace_enter() */
 #define _TIF_WORK_SYSCALL_ENTRY	\
@@ -146,8 +148,9 @@ struct thread_info {

 /* flags to check in __switch_to() */
 #define _TIF_WORK_CTXSW							\
-	(_TIF_IO_BITMAP|_TIF_DEBUGCTLMSR|_TIF_DS_AREA_MSR|_TIF_NOTSC)
-
+	(_TIF_IO_BITMAP|_TIF_DEBUGCTLMSR|_TIF_DS_AREA_MSR|_TIF_NOTSC|   \
+     _TIF_VPMU_CTXSW)
+
 #define _TIF_WORK_CTXSW_PREV _TIF_WORK_CTXSW
 #define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW|_TIF_DEBUG)

diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 5284cd2..d5269d8 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -178,6 +178,53 @@ int set_tsc_mode(unsigned int val)
 	return 0;
 }

+static const u32 vmx_pmu_msr_index[] = {
+  MSR_P6_EVNTSEL0, MSR_P6_EVNTSEL1, MSR_P6_PERFCTR0, MSR_P6_PERFCTR1,
+};
+#define NR_VMX_PMU_MSR ARRAY_SIZE(vmx_pmu_msr_index)
+static u64 vpmu_msr_list[NR_VMX_PMU_MSR];
+
+static void vpmu_load_msrs(u64 *msr_list)
+{
+    u64 *p = msr_list;
+    int i;
+
+	for (i = 0; i < NR_VMX_PMU_MSR; ++i) {
+	    wrmsrl(vmx_pmu_msr_index[i], *p);
+	    p++;
+	}
+}
+
+static void vpmu_save_msrs(u64 *msr_list)
+{
+    u64 *p = msr_list;
+    int i;
+
+	for (i = 0; i < NR_VMX_PMU_MSR; ++i) {
+	    rdmsrl(vmx_pmu_msr_index[i], *p);
+	    p++;
+	}
+}
+
+#define P6_EVENTSEL0_ENABLE     (1 << 22)
+static void enable_perf(void)
+{
+    u64 val;
+
+    rdmsrl(MSR_P6_EVNTSEL0, val);
+    val |= P6_EVENTSEL0_ENABLE;
+    wrmsrl(MSR_P6_EVNTSEL0, val);
+}
+
+static void disable_perf(void)
+{
+    u64 val;
+
+    rdmsrl(MSR_P6_EVNTSEL0, val);
+    val &= ~P6_EVENTSEL0_ENABLE;
+    wrmsrl(MSR_P6_EVNTSEL0, val);
+}
+
 void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p,
 		      struct tss_struct *tss)
 {
@@ -186,6 +233,21 @@ void __switch_to_xtra(struct task_struct *prev_p,
struct task_struct *next_p,
 	prev = &prev_p->thread;
 	next = &next_p->thread;

+    if (test_tsk_thread_flag(prev_p, TIF_VPMU_CTXSW) &&
+            test_tsk_thread_flag(next_p, TIF_VPMU_CTXSW)) {
+        /* do nothing, still in KVM context */
+    } else {
+        if (test_tsk_thread_flag(prev_p, TIF_VPMU_CTXSW)) {
+            disable_perf();
+            vpmu_save_msrs(vpmu_msr_list);
+        }
+
+        if (test_tsk_thread_flag(next_p, TIF_VPMU_CTXSW)) {
+            vpmu_load_msrs(vpmu_msr_list);
+            enable_perf();
+        }
+    }
+
 	if (test_tsk_thread_flag(next_p, TIF_DS_AREA_MSR) ||
 	    test_tsk_thread_flag(prev_p, TIF_DS_AREA_MSR))
 		ds_switch_to(prev_p, next_p);
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index ed53b42..4f4ff86 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -34,6 +34,7 @@
 #include <asm/vmx.h>
 #include <asm/virtext.h>
 #include <asm/mce.h>
+#include <linux/kdebug.h>

 #include "trace.h"

@@ -127,6 +128,7 @@ static u64 construct_eptp(unsigned long root_hpa);
 static DEFINE_PER_CPU(struct vmcs *, vmxarea);
 static DEFINE_PER_CPU(struct vmcs *, current_vmcs);
 static DEFINE_PER_CPU(struct list_head, vcpus_on_cpu);
+static DEFINE_PER_CPU(struct kvm_vcpu *, cur_exit_vcpu);

 static unsigned long *vmx_io_bitmap_a;
 static unsigned long *vmx_io_bitmap_b;
@@ -3603,6 +3605,7 @@ static void fixup_rmode_irq(struct vcpu_vmx *vmx)
 static void vmx_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 {
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
+    int cpu = raw_smp_processor_id();

 	if (enable_ept && is_paging(vcpu)) {
 		vmcs_writel(GUEST_CR3, vcpu->arch.cr3);
@@ -3639,6 +3642,9 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu,
struct kvm_run *kvm_run)
 	if (vcpu->arch.switch_db_regs)
 		set_debugreg(vcpu->arch.dr6, 6);

+    /* record the exited vcpu */
+    per_cpu(cur_exit_vcpu, cpu) = vcpu;
+
 	asm(
 		/* Store host registers */
 		"push %%"R"dx; push %%"R"bp;"
@@ -3985,6 +3991,43 @@ static struct kvm_x86_ops vmx_x86_ops = {
 	.gb_page_enable = vmx_gb_page_enable,
 };

+static void guest_set_apic(void *info)
+{
+    unsigned int v;
+
+    v = apic_read(APIC_LVTERR);
+    apic_write(APIC_LVTERR, v | APIC_LVT_MASKED);
+    apic_write(APIC_LVTPC, APIC_DM_NMI);
+    apic_write(APIC_LVTERR, v);
+}
+
+static int vmx_vcpu_nmi_notify(struct notifer_block *self,
+                   unsigned long val, void *data)
+{
+    int cpu = raw_smp_processor_id();
+    struct kvm_vcpu *vcpu = per_cpu(cur_exit_vcpu, cpu);
+    int ret = NOTIFY_DONE;
+
+    switch (val) {
+    case DIE_NMI:
+    case DIE_NMI_IPI:
+            guest_set_apic(NULL);
+            vcpu->cntr_overflow = 1;
+            vcpu->nmi_nr++;
+            ret = NOTIFY_STOP;
+            break;
+    default:
+            break;
+    }
+    return ret;
+}
+
+static struct notifier_block vmx_vcpu_nb = {
+        .notifier_call = vmx_vcpu_nmi_notify,
+        .next = NULL,
+        .priority = 3
+};
+
 static int __init vmx_init(void)
 {
 	int r;
@@ -4036,6 +4079,17 @@ static int __init vmx_init(void)
 	vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_ESP, false);
 	vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_EIP, false);

+	vmx_disable_intercept_for_msr(MSR_P6_PERFCTR0, false);
+	vmx_disable_intercept_for_msr(MSR_P6_PERFCTR1, false);
+	vmx_disable_intercept_for_msr(MSR_P6_EVNTSEL0, false);
+	vmx_disable_intercept_for_msr(MSR_P6_EVNTSEL1, false);
+
+    if (register_die_notifier(&vmx_vcpu_nb)) {
+        printk(KERN_ALERT "[hw_vpmu]: Register NMI handler failed..\n");
+    } else {
+        printk(KERN_ALERT "[hw_vpmu]: Register NMI handler succeeded..\n");
+    }
+
 	if (enable_ept) {
 		bypass_guest_pf = 0;
 		kvm_mmu_set_base_ptes(VMX_EPT_READABLE_MASK |
@@ -4071,6 +4125,9 @@ static void __exit vmx_exit(void)
 	free_page((unsigned long)vmx_io_bitmap_b);
 	free_page((unsigned long)vmx_io_bitmap_a);

+    unregister_die_notifier(&vmx_vcpu_nb);
+	printk(KERN_ALERT "[hw_vpmu]: Remove NMI handler module..\n");
+
 	kvm_exit();
 }

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index ae07d26..1abedb4 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -3615,6 +3615,11 @@ static int vcpu_enter_guest(struct kvm_vcpu
*vcpu, struct kvm_run *kvm_run)
 		goto out;
 	}

+    if (vcpu->cntr_overflow) {
+            vcpu->arch.nmi_pending = 1;
+            vcpu->cntr_overflow = 0;
+    }
+
 	inject_pending_event(vcpu, kvm_run);

 	/* enable NMI/IRQ window open exits if needed */
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index b7bbb5d..96d63d1 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -99,6 +99,9 @@ struct kvm_vcpu {
 	gpa_t mmio_phys_addr;
 #endif

+    int cntr_overflow;
+    int nmi_nr;
+
 	struct kvm_vcpu_arch arch;
 };

diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index cf24c20..b0942c1 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -225,6 +225,9 @@ extern int flush_work(struct work_struct *work);

 extern int cancel_work_sync(struct work_struct *work);

+extern struct task_struct * thread_of_workqueue(struct workqueue_struct *wq,
+        int cpu);
+
 /*
  * Kill off a pending schedule_delayed_work().  Note that the work callback
  * function may still be running on return from cancel_delayed_work(), unless
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 67e526b..5eb9503 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -150,6 +150,15 @@ static void __queue_work(struct cpu_workqueue_struct *cwq,
 	spin_unlock_irqrestore(&cwq->lock, flags);
 }

+struct task_struct * thread_of_workqueue(struct workqueue_struct *wq,
+        int cpu)
+{
+    struct cpu_workqueue_struct *cwq = wq_per_cpu(wq, cpu);
+
+    return cwq->thread;
+}
+EXPORT_SYMBOL_GPL(thread_of_workqueue);
+
 /**
  * queue_work - queue work on a workqueue
  * @wq: workqueue to use
diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c
index bb4ebd8..33b5da8 100644
--- a/virt/kvm/eventfd.c
+++ b/virt/kvm/eventfd.c
@@ -318,10 +318,18 @@ kvm_irqfd_release(struct kvm *kvm)
  */
 static int __init irqfd_module_init(void)
 {
+    int cpu = raw_smp_processor_id();
+    struct task_struct *thread;
+
 	irqfd_cleanup_wq = create_singlethread_workqueue("kvm-irqfd-cleanup");
 	if (!irqfd_cleanup_wq)
 		return -ENOMEM;

+    thread = thread_of_workqueue(irqfd_cleanup_wq, cpu);
+    set_tsk_thread_flag(thread, TIF_VPMU_CTXSW);
+    printk(KERN_ALERT "[hw_vpmu]: monitored irqfd thread id = %d\n",
+            (int)thread->pid);
+
 	return 0;
 }

diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 7495ce3..355bff5 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -1809,6 +1809,11 @@ static int kvm_vm_ioctl_create_vcpu(struct kvm
*kvm, u32 id)
 		kvm->bsp_vcpu = vcpu;
 #endif
 	mutex_unlock(&kvm->lock);
+
+    set_tsk_thread_flag(current, TIF_VPMU_CTXSW);
+    printk(KERN_ALERT "[hw_vpmu]: monitored vcpu thread id = %d\n",
+            (int)current->pid);
+
 	return r;

 vcpu_destroy:
@@ -2360,6 +2365,10 @@ static int kvm_dev_ioctl_create_vm(void)
 	if (fd < 0)
 		kvm_put_kvm(kvm);

+    set_tsk_thread_flag(current, TIF_VPMU_CTXSW);
+    printk(KERN_ALERT "[hw_vpmu]: monitored main thread id = %d\n",
+            (int)current->pid);
+
 	return fd;
 }


=============Guest-wide profiling with cpu-switch, for
Linux-2.6.32==================

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index ed53b42..970b5ab 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -34,6 +34,7 @@
 #include <asm/vmx.h>
 #include <asm/virtext.h>
 #include <asm/mce.h>
+#include <linux/kdebug.h>

 #include "trace.h"

@@ -114,6 +115,9 @@ struct vcpu_vmx {
 	ktime_t entry_time;
 	s64 vnmi_blocked_time;
 	u32 exit_reason;
+
+    unsigned long *msr_host_load_store;
+    unsigned long *msr_guest_load_store;
 };

 static inline struct vcpu_vmx *to_vmx(struct kvm_vcpu *vcpu)
@@ -127,12 +131,18 @@ static u64 construct_eptp(unsigned long root_hpa);
 static DEFINE_PER_CPU(struct vmcs *, vmxarea);
 static DEFINE_PER_CPU(struct vmcs *, current_vmcs);
 static DEFINE_PER_CPU(struct list_head, vcpus_on_cpu);
+static DEFINE_PER_CPU(struct kvm_vcpu *, cur_exit_vcpu);

 static unsigned long *vmx_io_bitmap_a;
 static unsigned long *vmx_io_bitmap_b;
 static unsigned long *vmx_msr_bitmap_legacy;
 static unsigned long *vmx_msr_bitmap_longmode;

+static const u32 vmx_pmu_msr_index[] = {
+  MSR_P6_EVNTSEL0, MSR_P6_EVNTSEL1, MSR_P6_PERFCTR0, MSR_P6_PERFCTR1,
+};
+#define NR_VMX_PMU_MSR ARRAY_SIZE(vmx_pmu_msr_index)
+
 static DECLARE_BITMAP(vmx_vpid_bitmap, VMX_NR_VPIDS);
 static DEFINE_SPINLOCK(vmx_vpid_lock);

@@ -2272,6 +2282,14 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
 	vmcs_write64(IO_BITMAP_A, __pa(vmx_io_bitmap_a));
 	vmcs_write64(IO_BITMAP_B, __pa(vmx_io_bitmap_b));

+	vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, NR_VMX_PMU_MSR);
+	vmcs_write64(VM_ENTRY_MSR_LOAD_ADDR, __pa(vmx->msr_guest_load_store));
+	
+	vmcs_write32(VM_EXIT_MSR_STORE_COUNT, NR_VMX_PMU_MSR);
+	vmcs_write64(VM_EXIT_MSR_STORE_ADDR, __pa(vmx->msr_guest_load_store));
+	vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, NR_VMX_PMU_MSR);
+	vmcs_write64(VM_EXIT_MSR_LOAD_ADDR, __pa(vmx->msr_host_load_store));
+
 	if (cpu_has_vmx_msr_bitmap())
 		vmcs_write64(MSR_BITMAP, __pa(vmx_msr_bitmap_legacy));

@@ -2340,9 +2358,6 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx)

 	asm("mov $.Lkvm_vmx_return, %0" : "=r"(kvm_vmx_return));
 	vmcs_writel(HOST_RIP, kvm_vmx_return); /* 22.2.5 */
-	vmcs_write32(VM_EXIT_MSR_STORE_COUNT, 0);
-	vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, 0);
-	vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, 0);

 	rdmsr(MSR_IA32_SYSENTER_CS, host_sysenter_cs, junk);
 	vmcs_write32(HOST_IA32_SYSENTER_CS, host_sysenter_cs);
@@ -3600,9 +3615,34 @@ static void fixup_rmode_irq(struct vcpu_vmx *vmx)
 #define Q "l"
 #endif

+static void guest_set_apic(void *info)
+{
+    unsigned int v;
+
+    v = apic_read(APIC_LVTERR);
+    apic_write(APIC_LVTERR, v | APIC_LVT_MASKED);
+    apic_write(APIC_LVTPC, APIC_DM_NMI);
+    apic_write(APIC_LVTERR, v);
+}
+
+static void save_host_msrs(struct vcpu_vmx *vmx)
+{
+    u32 *p;
+    int i;
+
+	p = (u32 *)vmx->msr_host_load_store;
+	for (i = 0; i < NR_VMX_PMU_MSR; ++i) {
+	    *p = vmx_pmu_msr_index[i];
+	    p += 2;
+	    rdmsrl(vmx_pmu_msr_index[i], *((u64 *)p));
+	    p += 2;
+	}
+}
+
 static void vmx_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 {
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
+    int cpu = raw_smp_processor_id();

 	if (enable_ept && is_paging(vcpu)) {
 		vmcs_writel(GUEST_CR3, vcpu->arch.cr3);
@@ -3639,6 +3679,12 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu,
struct kvm_run *kvm_run)
 	if (vcpu->arch.switch_db_regs)
 		set_debugreg(vcpu->arch.dr6, 6);

+    /* record the exited vcpu */
+    per_cpu(cur_exit_vcpu, cpu) = vcpu;
+
+    /* The guest counters are reloaded by the hardware later. */
+    save_host_msrs(vmx);
+
 	asm(
 		/* Store host registers */
 		"push %%"R"dx; push %%"R"bp;"
@@ -3750,6 +3796,10 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu,
struct kvm_run *kvm_run)
 	vmx->launched = 1;

 	vmx_complete_interrupts(vmx);
+
+    /* always clear LVTPC bit */
+    guest_set_apic(NULL);
+
 }

 #undef R
@@ -3766,6 +3816,59 @@ static void vmx_free_vmcs(struct kvm_vcpu *vcpu)
 	}
 }

+static int vmx_create_vpmu_msrs(struct kvm_vcpu *vcpu)
+{
+    int i, r = 0;
+    u32 *p;
+	struct vcpu_vmx *vmx = to_vmx(vcpu);
+
+	vmx->msr_host_load_store = (unsigned long *)__get_free_page(GFP_KERNEL);
+	if (!vmx->msr_host_load_store) {
+	    r = -ENOMEM;
+	}
+	
+	vmx->msr_guest_load_store = (unsigned long *)__get_free_page(GFP_KERNEL);
+	if (!vmx->msr_guest_load_store) {
+	    r = -ENOMEM;
+	    goto free_msr_host;
+	}
+	
+	memset(vmx->msr_host_load_store, 0x00, PAGE_SIZE);
+	memset(vmx->msr_guest_load_store, 0x00, PAGE_SIZE);
+
+	/* Initialize load&store memory area. Use the contents of host MSRs as
+     * initial values.. */
+	p = (u32 *)vmx->msr_host_load_store;
+	for (i = 0; i < NR_VMX_PMU_MSR; ++i) {
+	    *p = vmx_pmu_msr_index[i];
+	    p += 2;
+	    rdmsrl(vmx_pmu_msr_index[i], *((u64 *)p));
+	    p += 2;
+	}
+	
+	p = (u32 *)vmx->msr_guest_load_store;
+	for (i = 0; i < NR_VMX_PMU_MSR; ++i) {
+	    *p = vmx_pmu_msr_index[i];
+	    p += 2;
+	    rdmsrl(vmx_pmu_msr_index[i], *((u64 *)p));
+	    p += 2;
+	}
+
+    return r;
+
+free_msr_host:
+    free_page((unsigned long)vmx->msr_host_load_store);
+    return r;
+}
+
+static void vmx_free_vpmu_msrs(struct kvm_vcpu *vcpu)
+{
+	struct vcpu_vmx *vmx = to_vmx(vcpu);
+
+	free_page((unsigned long)vmx->msr_host_load_store);
+	free_page((unsigned long)vmx->msr_guest_load_store);
+}
+
 static void vmx_free_vcpu(struct kvm_vcpu *vcpu)
 {
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
@@ -3777,6 +3880,7 @@ static void vmx_free_vcpu(struct kvm_vcpu *vcpu)
 	vmx_free_vmcs(vcpu);
 	kfree(vmx->host_msrs);
 	kfree(vmx->guest_msrs);
+    vmx_free_vpmu_msrs(vcpu);
 	kvm_vcpu_uninit(vcpu);
 	kmem_cache_free(kvm_vcpu_cache, vmx);
 }
@@ -3812,6 +3916,9 @@ static struct kvm_vcpu *vmx_create_vcpu(struct
kvm *kvm, unsigned int id)

 	vmcs_clear(vmx->vmcs);

+    if (vmx_create_vpmu_msrs(&vmx->vcpu))
+        goto free_vmcs;
+
 	cpu = get_cpu();
 	vmx_vcpu_load(&vmx->vcpu, cpu);
 	err = vmx_vcpu_setup(vmx);
@@ -3985,6 +4092,33 @@ static struct kvm_x86_ops vmx_x86_ops = {
 	.gb_page_enable = vmx_gb_page_enable,
 };

+static int vmx_vcpu_nmi_notify(struct notifer_block *self,
+                   unsigned long val, void *data)
+{
+	    int cpu = raw_smp_processor_id();
+        struct kvm_vcpu *vcpu = per_cpu(cur_exit_vcpu, cpu);
+        int ret = NOTIFY_DONE;
+
+        switch (val) {
+        case DIE_NMI:
+        case DIE_NMI_IPI:
+                guest_set_apic(NULL);
+                vcpu->cntr_overflow = 1;
+                vcpu->nmi_nr++;
+                ret = NOTIFY_STOP;
+                break;
+        default:
+                break;
+        }
+        return ret;
+}
+
+static struct notifier_block vmx_vcpu_nb = {
+        .notifier_call = vmx_vcpu_nmi_notify,
+        .next = NULL,
+        .priority = 3
+};
+
 static int __init vmx_init(void)
 {
 	int r;
@@ -4036,6 +4170,17 @@ static int __init vmx_init(void)
 	vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_ESP, false);
 	vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_EIP, false);

+	vmx_disable_intercept_for_msr(MSR_P6_PERFCTR0, false);
+	vmx_disable_intercept_for_msr(MSR_P6_PERFCTR1, false);
+	vmx_disable_intercept_for_msr(MSR_P6_EVNTSEL0, false);
+	vmx_disable_intercept_for_msr(MSR_P6_EVNTSEL1, false);
+
+    if (register_die_notifier(&vmx_vcpu_nb)) {
+        printk(KERN_ALERT "[hw_vpmu]: Register NMI handler failed..\n");
+    } else {
+        printk(KERN_ALERT "[hw_vpmu]: Register NMI handler succeeded..\n");
+    }
+
 	if (enable_ept) {
 		bypass_guest_pf = 0;
 		kvm_mmu_set_base_ptes(VMX_EPT_READABLE_MASK |
@@ -4071,6 +4216,9 @@ static void __exit vmx_exit(void)
 	free_page((unsigned long)vmx_io_bitmap_b);
 	free_page((unsigned long)vmx_io_bitmap_a);

+    unregister_die_notifier(&vmx_vcpu_nb);
+	printk(KERN_ALERT "[hw_vpmu]: Remove NMI handler module..\n");
+
 	kvm_exit();
 }

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index ae07d26..1abedb4 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -3615,6 +3615,11 @@ static int vcpu_enter_guest(struct kvm_vcpu
*vcpu, struct kvm_run *kvm_run)
 		goto out;
 	}

+    if (vcpu->cntr_overflow) {
+            vcpu->arch.nmi_pending = 1;
+            vcpu->cntr_overflow = 0;
+    }
+
 	inject_pending_event(vcpu, kvm_run);

 	/* enable NMI/IRQ window open exits if needed */
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index b7bbb5d..96d63d1 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -99,6 +99,9 @@ struct kvm_vcpu {
 	gpa_t mmio_phys_addr;
 #endif

+    int cntr_overflow;
+    int nmi_nr;
+
 	struct kvm_vcpu_arch arch;
 };
--
To unsubscribe from this list: send the line "unsubscribe kvm" in