From patchwork Wed Mar 31 16:53:15 2010 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Jiaqing Du X-Patchwork-Id: 89986 Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by demeter.kernel.org (8.14.3/8.14.3) with ESMTP id o2VGrVQ5017449 for ; Wed, 31 Mar 2010 16:53:32 GMT Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1757764Ab0CaQxU (ORCPT ); Wed, 31 Mar 2010 12:53:20 -0400 Received: from mail-fx0-f227.google.com ([209.85.220.227]:63463 "EHLO mail-fx0-f227.google.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1757616Ab0CaQxT (ORCPT ); Wed, 31 Mar 2010 12:53:19 -0400 Received: by fxm27 with SMTP id 27so252884fxm.28 for ; Wed, 31 Mar 2010 09:53:17 -0700 (PDT) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gmail.com; s=gamma; h=domainkey-signature:mime-version:received:date:received:message-id :subject:from:to:cc:content-type; bh=DNTbVpSIqwDIrJJs0+NKv26RpNkJ8XoNVsMVljQ1Co0=; b=V169pxHVVG4MaLtSo9a1HH3b9BB/XP7I5cGYXyunC/y0hp53HIBOlwUnQ369ImYIrX 2gWOaIwZwNNMt2qWifazlcZsQBuaGtkzLL8d9dnF3BMEbgLQB7vrCnRma/y3UEJ4Gm9W JxVEGY2ZlInZG6mje87LdEiaK5y6gTyoU9/2U= DomainKey-Signature: a=rsa-sha1; c=nofws; d=gmail.com; s=gamma; h=mime-version:date:message-id:subject:from:to:cc:content-type; b=tEMRZ5LGPzRGEu0/axZ+cRJdBYwGQOk6blB8Ud/41tFbr9PMENkEXL47hSyVcT6nfX MEtythEE3i9h5RFnMbBNR4UIvg7E0lQzR+6UQLLc3ImuFDB3CUphNmCN/f+O94kia5n2 hiLS7+3H9ml65C6x/ThRSi0MF/na3LELBNu0k= MIME-Version: 1.0 Received: by 10.239.154.207 with HTTP; Wed, 31 Mar 2010 09:53:15 -0700 (PDT) Date: Wed, 31 Mar 2010 18:53:15 +0200 Received: by 10.239.152.130 with SMTP id v2mr750069hbb.168.1270054395445; Wed, 31 Mar 2010 09:53:15 -0700 (PDT) Message-ID: <6d8082041003310953p33e30819vbb7c2a122bd6becd@mail.gmail.com> Subject: Some Code for Performance Profiling From: Jiaqing Du To: kvm@vger.kernel.org Cc: Nipun sehrawat Sender: kvm-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: kvm@vger.kernel.org X-Greylist: IP, sender and recipient auto-whitelisted, not delayed by milter-greylist-4.2.3 (demeter.kernel.org [140.211.167.41]); Wed, 31 Mar 2010 16:53:32 +0000 (UTC) =============Guest-wide profiling with domain-switch, for Linux-2.6.32================== diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h index d27d0a2..b749b5d 100644 --- a/arch/x86/include/asm/thread_info.h +++ b/arch/x86/include/asm/thread_info.h @@ -96,6 +96,7 @@ struct thread_info { #define TIF_DS_AREA_MSR 26 /* uses thread_struct.ds_area_msr */ #define TIF_LAZY_MMU_UPDATES 27 /* task is updating the mmu lazily */ #define TIF_SYSCALL_TRACEPOINT 28 /* syscall tracepoint instrumentation */ +#define TIF_VPMU_CTXSW 29 /* KVM thread tag */ #define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE) #define _TIF_NOTIFY_RESUME (1 << TIF_NOTIFY_RESUME) @@ -119,6 +120,7 @@ struct thread_info { #define _TIF_DS_AREA_MSR (1 << TIF_DS_AREA_MSR) #define _TIF_LAZY_MMU_UPDATES (1 << TIF_LAZY_MMU_UPDATES) #define _TIF_SYSCALL_TRACEPOINT (1 << TIF_SYSCALL_TRACEPOINT) +#define _TIF_VPMU_CTXSW (1 << TIF_VPMU_CTXSW) /* work to do in syscall_trace_enter() */ #define _TIF_WORK_SYSCALL_ENTRY \ @@ -146,8 +148,9 @@ struct thread_info { /* flags to check in __switch_to() */ #define _TIF_WORK_CTXSW \ - (_TIF_IO_BITMAP|_TIF_DEBUGCTLMSR|_TIF_DS_AREA_MSR|_TIF_NOTSC) - + (_TIF_IO_BITMAP|_TIF_DEBUGCTLMSR|_TIF_DS_AREA_MSR|_TIF_NOTSC| \ + _TIF_VPMU_CTXSW) + #define _TIF_WORK_CTXSW_PREV _TIF_WORK_CTXSW #define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW|_TIF_DEBUG) diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 5284cd2..d5269d8 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -178,6 +178,53 @@ int set_tsc_mode(unsigned int val) return 0; } +static const u32 vmx_pmu_msr_index[] = { + MSR_P6_EVNTSEL0, MSR_P6_EVNTSEL1, MSR_P6_PERFCTR0, MSR_P6_PERFCTR1, +}; +#define NR_VMX_PMU_MSR ARRAY_SIZE(vmx_pmu_msr_index) +static u64 vpmu_msr_list[NR_VMX_PMU_MSR]; + +static void vpmu_load_msrs(u64 *msr_list) +{ + u64 *p = msr_list; + int i; + + for (i = 0; i < NR_VMX_PMU_MSR; ++i) { + wrmsrl(vmx_pmu_msr_index[i], *p); + p++; + } +} + +static void vpmu_save_msrs(u64 *msr_list) +{ + u64 *p = msr_list; + int i; + + for (i = 0; i < NR_VMX_PMU_MSR; ++i) { + rdmsrl(vmx_pmu_msr_index[i], *p); + p++; + } +} + +#define P6_EVENTSEL0_ENABLE (1 << 22) +static void enable_perf(void) +{ + u64 val; + + rdmsrl(MSR_P6_EVNTSEL0, val); + val |= P6_EVENTSEL0_ENABLE; + wrmsrl(MSR_P6_EVNTSEL0, val); +} + +static void disable_perf(void) +{ + u64 val; + + rdmsrl(MSR_P6_EVNTSEL0, val); + val &= ~P6_EVENTSEL0_ENABLE; + wrmsrl(MSR_P6_EVNTSEL0, val); +} + void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p, struct tss_struct *tss) { @@ -186,6 +233,21 @@ void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p, prev = &prev_p->thread; next = &next_p->thread; + if (test_tsk_thread_flag(prev_p, TIF_VPMU_CTXSW) && + test_tsk_thread_flag(next_p, TIF_VPMU_CTXSW)) { + /* do nothing, still in KVM context */ + } else { + if (test_tsk_thread_flag(prev_p, TIF_VPMU_CTXSW)) { + disable_perf(); + vpmu_save_msrs(vpmu_msr_list); + } + + if (test_tsk_thread_flag(next_p, TIF_VPMU_CTXSW)) { + vpmu_load_msrs(vpmu_msr_list); + enable_perf(); + } + } + if (test_tsk_thread_flag(next_p, TIF_DS_AREA_MSR) || test_tsk_thread_flag(prev_p, TIF_DS_AREA_MSR)) ds_switch_to(prev_p, next_p); diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index ed53b42..4f4ff86 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -34,6 +34,7 @@ #include #include #include +#include #include "trace.h" @@ -127,6 +128,7 @@ static u64 construct_eptp(unsigned long root_hpa); static DEFINE_PER_CPU(struct vmcs *, vmxarea); static DEFINE_PER_CPU(struct vmcs *, current_vmcs); static DEFINE_PER_CPU(struct list_head, vcpus_on_cpu); +static DEFINE_PER_CPU(struct kvm_vcpu *, cur_exit_vcpu); static unsigned long *vmx_io_bitmap_a; static unsigned long *vmx_io_bitmap_b; @@ -3603,6 +3605,7 @@ static void fixup_rmode_irq(struct vcpu_vmx *vmx) static void vmx_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) { struct vcpu_vmx *vmx = to_vmx(vcpu); + int cpu = raw_smp_processor_id(); if (enable_ept && is_paging(vcpu)) { vmcs_writel(GUEST_CR3, vcpu->arch.cr3); @@ -3639,6 +3642,9 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) if (vcpu->arch.switch_db_regs) set_debugreg(vcpu->arch.dr6, 6); + /* record the exited vcpu */ + per_cpu(cur_exit_vcpu, cpu) = vcpu; + asm( /* Store host registers */ "push %%"R"dx; push %%"R"bp;" @@ -3985,6 +3991,43 @@ static struct kvm_x86_ops vmx_x86_ops = { .gb_page_enable = vmx_gb_page_enable, }; +static void guest_set_apic(void *info) +{ + unsigned int v; + + v = apic_read(APIC_LVTERR); + apic_write(APIC_LVTERR, v | APIC_LVT_MASKED); + apic_write(APIC_LVTPC, APIC_DM_NMI); + apic_write(APIC_LVTERR, v); +} + +static int vmx_vcpu_nmi_notify(struct notifer_block *self, + unsigned long val, void *data) +{ + int cpu = raw_smp_processor_id(); + struct kvm_vcpu *vcpu = per_cpu(cur_exit_vcpu, cpu); + int ret = NOTIFY_DONE; + + switch (val) { + case DIE_NMI: + case DIE_NMI_IPI: + guest_set_apic(NULL); + vcpu->cntr_overflow = 1; + vcpu->nmi_nr++; + ret = NOTIFY_STOP; + break; + default: + break; + } + return ret; +} + +static struct notifier_block vmx_vcpu_nb = { + .notifier_call = vmx_vcpu_nmi_notify, + .next = NULL, + .priority = 3 +}; + static int __init vmx_init(void) { int r; @@ -4036,6 +4079,17 @@ static int __init vmx_init(void) vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_ESP, false); vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_EIP, false); + vmx_disable_intercept_for_msr(MSR_P6_PERFCTR0, false); + vmx_disable_intercept_for_msr(MSR_P6_PERFCTR1, false); + vmx_disable_intercept_for_msr(MSR_P6_EVNTSEL0, false); + vmx_disable_intercept_for_msr(MSR_P6_EVNTSEL1, false); + + if (register_die_notifier(&vmx_vcpu_nb)) { + printk(KERN_ALERT "[hw_vpmu]: Register NMI handler failed..\n"); + } else { + printk(KERN_ALERT "[hw_vpmu]: Register NMI handler succeeded..\n"); + } + if (enable_ept) { bypass_guest_pf = 0; kvm_mmu_set_base_ptes(VMX_EPT_READABLE_MASK | @@ -4071,6 +4125,9 @@ static void __exit vmx_exit(void) free_page((unsigned long)vmx_io_bitmap_b); free_page((unsigned long)vmx_io_bitmap_a); + unregister_die_notifier(&vmx_vcpu_nb); + printk(KERN_ALERT "[hw_vpmu]: Remove NMI handler module..\n"); + kvm_exit(); } diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index ae07d26..1abedb4 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -3615,6 +3615,11 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) goto out; } + if (vcpu->cntr_overflow) { + vcpu->arch.nmi_pending = 1; + vcpu->cntr_overflow = 0; + } + inject_pending_event(vcpu, kvm_run); /* enable NMI/IRQ window open exits if needed */ diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index b7bbb5d..96d63d1 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -99,6 +99,9 @@ struct kvm_vcpu { gpa_t mmio_phys_addr; #endif + int cntr_overflow; + int nmi_nr; + struct kvm_vcpu_arch arch; }; diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h index cf24c20..b0942c1 100644 --- a/include/linux/workqueue.h +++ b/include/linux/workqueue.h @@ -225,6 +225,9 @@ extern int flush_work(struct work_struct *work); extern int cancel_work_sync(struct work_struct *work); +extern struct task_struct * thread_of_workqueue(struct workqueue_struct *wq, + int cpu); + /* * Kill off a pending schedule_delayed_work(). Note that the work callback * function may still be running on return from cancel_delayed_work(), unless diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 67e526b..5eb9503 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -150,6 +150,15 @@ static void __queue_work(struct cpu_workqueue_struct *cwq, spin_unlock_irqrestore(&cwq->lock, flags); } +struct task_struct * thread_of_workqueue(struct workqueue_struct *wq, + int cpu) +{ + struct cpu_workqueue_struct *cwq = wq_per_cpu(wq, cpu); + + return cwq->thread; +} +EXPORT_SYMBOL_GPL(thread_of_workqueue); + /** * queue_work - queue work on a workqueue * @wq: workqueue to use diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c index bb4ebd8..33b5da8 100644 --- a/virt/kvm/eventfd.c +++ b/virt/kvm/eventfd.c @@ -318,10 +318,18 @@ kvm_irqfd_release(struct kvm *kvm) */ static int __init irqfd_module_init(void) { + int cpu = raw_smp_processor_id(); + struct task_struct *thread; + irqfd_cleanup_wq = create_singlethread_workqueue("kvm-irqfd-cleanup"); if (!irqfd_cleanup_wq) return -ENOMEM; + thread = thread_of_workqueue(irqfd_cleanup_wq, cpu); + set_tsk_thread_flag(thread, TIF_VPMU_CTXSW); + printk(KERN_ALERT "[hw_vpmu]: monitored irqfd thread id = %d\n", + (int)thread->pid); + return 0; } diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 7495ce3..355bff5 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -1809,6 +1809,11 @@ static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, u32 id) kvm->bsp_vcpu = vcpu; #endif mutex_unlock(&kvm->lock); + + set_tsk_thread_flag(current, TIF_VPMU_CTXSW); + printk(KERN_ALERT "[hw_vpmu]: monitored vcpu thread id = %d\n", + (int)current->pid); + return r; vcpu_destroy: @@ -2360,6 +2365,10 @@ static int kvm_dev_ioctl_create_vm(void) if (fd < 0) kvm_put_kvm(kvm); + set_tsk_thread_flag(current, TIF_VPMU_CTXSW); + printk(KERN_ALERT "[hw_vpmu]: monitored main thread id = %d\n", + (int)current->pid); + return fd; } =============Guest-wide profiling with cpu-switch, for Linux-2.6.32================== diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index ed53b42..970b5ab 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -34,6 +34,7 @@ #include #include #include +#include #include "trace.h" @@ -114,6 +115,9 @@ struct vcpu_vmx { ktime_t entry_time; s64 vnmi_blocked_time; u32 exit_reason; + + unsigned long *msr_host_load_store; + unsigned long *msr_guest_load_store; }; static inline struct vcpu_vmx *to_vmx(struct kvm_vcpu *vcpu) @@ -127,12 +131,18 @@ static u64 construct_eptp(unsigned long root_hpa); static DEFINE_PER_CPU(struct vmcs *, vmxarea); static DEFINE_PER_CPU(struct vmcs *, current_vmcs); static DEFINE_PER_CPU(struct list_head, vcpus_on_cpu); +static DEFINE_PER_CPU(struct kvm_vcpu *, cur_exit_vcpu); static unsigned long *vmx_io_bitmap_a; static unsigned long *vmx_io_bitmap_b; static unsigned long *vmx_msr_bitmap_legacy; static unsigned long *vmx_msr_bitmap_longmode; +static const u32 vmx_pmu_msr_index[] = { + MSR_P6_EVNTSEL0, MSR_P6_EVNTSEL1, MSR_P6_PERFCTR0, MSR_P6_PERFCTR1, +}; +#define NR_VMX_PMU_MSR ARRAY_SIZE(vmx_pmu_msr_index) + static DECLARE_BITMAP(vmx_vpid_bitmap, VMX_NR_VPIDS); static DEFINE_SPINLOCK(vmx_vpid_lock); @@ -2272,6 +2282,14 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx) vmcs_write64(IO_BITMAP_A, __pa(vmx_io_bitmap_a)); vmcs_write64(IO_BITMAP_B, __pa(vmx_io_bitmap_b)); + vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, NR_VMX_PMU_MSR); + vmcs_write64(VM_ENTRY_MSR_LOAD_ADDR, __pa(vmx->msr_guest_load_store)); + + vmcs_write32(VM_EXIT_MSR_STORE_COUNT, NR_VMX_PMU_MSR); + vmcs_write64(VM_EXIT_MSR_STORE_ADDR, __pa(vmx->msr_guest_load_store)); + vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, NR_VMX_PMU_MSR); + vmcs_write64(VM_EXIT_MSR_LOAD_ADDR, __pa(vmx->msr_host_load_store)); + if (cpu_has_vmx_msr_bitmap()) vmcs_write64(MSR_BITMAP, __pa(vmx_msr_bitmap_legacy)); @@ -2340,9 +2358,6 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx) asm("mov $.Lkvm_vmx_return, %0" : "=r"(kvm_vmx_return)); vmcs_writel(HOST_RIP, kvm_vmx_return); /* 22.2.5 */ - vmcs_write32(VM_EXIT_MSR_STORE_COUNT, 0); - vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, 0); - vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, 0); rdmsr(MSR_IA32_SYSENTER_CS, host_sysenter_cs, junk); vmcs_write32(HOST_IA32_SYSENTER_CS, host_sysenter_cs); @@ -3600,9 +3615,34 @@ static void fixup_rmode_irq(struct vcpu_vmx *vmx) #define Q "l" #endif +static void guest_set_apic(void *info) +{ + unsigned int v; + + v = apic_read(APIC_LVTERR); + apic_write(APIC_LVTERR, v | APIC_LVT_MASKED); + apic_write(APIC_LVTPC, APIC_DM_NMI); + apic_write(APIC_LVTERR, v); +} + +static void save_host_msrs(struct vcpu_vmx *vmx) +{ + u32 *p; + int i; + + p = (u32 *)vmx->msr_host_load_store; + for (i = 0; i < NR_VMX_PMU_MSR; ++i) { + *p = vmx_pmu_msr_index[i]; + p += 2; + rdmsrl(vmx_pmu_msr_index[i], *((u64 *)p)); + p += 2; + } +} + static void vmx_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) { struct vcpu_vmx *vmx = to_vmx(vcpu); + int cpu = raw_smp_processor_id(); if (enable_ept && is_paging(vcpu)) { vmcs_writel(GUEST_CR3, vcpu->arch.cr3); @@ -3639,6 +3679,12 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) if (vcpu->arch.switch_db_regs) set_debugreg(vcpu->arch.dr6, 6); + /* record the exited vcpu */ + per_cpu(cur_exit_vcpu, cpu) = vcpu; + + /* The guest counters are reloaded by the hardware later. */ + save_host_msrs(vmx); + asm( /* Store host registers */ "push %%"R"dx; push %%"R"bp;" @@ -3750,6 +3796,10 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) vmx->launched = 1; vmx_complete_interrupts(vmx); + + /* always clear LVTPC bit */ + guest_set_apic(NULL); + } #undef R @@ -3766,6 +3816,59 @@ static void vmx_free_vmcs(struct kvm_vcpu *vcpu) } } +static int vmx_create_vpmu_msrs(struct kvm_vcpu *vcpu) +{ + int i, r = 0; + u32 *p; + struct vcpu_vmx *vmx = to_vmx(vcpu); + + vmx->msr_host_load_store = (unsigned long *)__get_free_page(GFP_KERNEL); + if (!vmx->msr_host_load_store) { + r = -ENOMEM; + } + + vmx->msr_guest_load_store = (unsigned long *)__get_free_page(GFP_KERNEL); + if (!vmx->msr_guest_load_store) { + r = -ENOMEM; + goto free_msr_host; + } + + memset(vmx->msr_host_load_store, 0x00, PAGE_SIZE); + memset(vmx->msr_guest_load_store, 0x00, PAGE_SIZE); + + /* Initialize load&store memory area. Use the contents of host MSRs as + * initial values.. */ + p = (u32 *)vmx->msr_host_load_store; + for (i = 0; i < NR_VMX_PMU_MSR; ++i) { + *p = vmx_pmu_msr_index[i]; + p += 2; + rdmsrl(vmx_pmu_msr_index[i], *((u64 *)p)); + p += 2; + } + + p = (u32 *)vmx->msr_guest_load_store; + for (i = 0; i < NR_VMX_PMU_MSR; ++i) { + *p = vmx_pmu_msr_index[i]; + p += 2; + rdmsrl(vmx_pmu_msr_index[i], *((u64 *)p)); + p += 2; + } + + return r; + +free_msr_host: + free_page((unsigned long)vmx->msr_host_load_store); + return r; +} + +static void vmx_free_vpmu_msrs(struct kvm_vcpu *vcpu) +{ + struct vcpu_vmx *vmx = to_vmx(vcpu); + + free_page((unsigned long)vmx->msr_host_load_store); + free_page((unsigned long)vmx->msr_guest_load_store); +} + static void vmx_free_vcpu(struct kvm_vcpu *vcpu) { struct vcpu_vmx *vmx = to_vmx(vcpu); @@ -3777,6 +3880,7 @@ static void vmx_free_vcpu(struct kvm_vcpu *vcpu) vmx_free_vmcs(vcpu); kfree(vmx->host_msrs); kfree(vmx->guest_msrs); + vmx_free_vpmu_msrs(vcpu); kvm_vcpu_uninit(vcpu); kmem_cache_free(kvm_vcpu_cache, vmx); } @@ -3812,6 +3916,9 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id) vmcs_clear(vmx->vmcs); + if (vmx_create_vpmu_msrs(&vmx->vcpu)) + goto free_vmcs; + cpu = get_cpu(); vmx_vcpu_load(&vmx->vcpu, cpu); err = vmx_vcpu_setup(vmx); @@ -3985,6 +4092,33 @@ static struct kvm_x86_ops vmx_x86_ops = { .gb_page_enable = vmx_gb_page_enable, }; +static int vmx_vcpu_nmi_notify(struct notifer_block *self, + unsigned long val, void *data) +{ + int cpu = raw_smp_processor_id(); + struct kvm_vcpu *vcpu = per_cpu(cur_exit_vcpu, cpu); + int ret = NOTIFY_DONE; + + switch (val) { + case DIE_NMI: + case DIE_NMI_IPI: + guest_set_apic(NULL); + vcpu->cntr_overflow = 1; + vcpu->nmi_nr++; + ret = NOTIFY_STOP; + break; + default: + break; + } + return ret; +} + +static struct notifier_block vmx_vcpu_nb = { + .notifier_call = vmx_vcpu_nmi_notify, + .next = NULL, + .priority = 3 +}; + static int __init vmx_init(void) { int r; @@ -4036,6 +4170,17 @@ static int __init vmx_init(void) vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_ESP, false); vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_EIP, false); + vmx_disable_intercept_for_msr(MSR_P6_PERFCTR0, false); + vmx_disable_intercept_for_msr(MSR_P6_PERFCTR1, false); + vmx_disable_intercept_for_msr(MSR_P6_EVNTSEL0, false); + vmx_disable_intercept_for_msr(MSR_P6_EVNTSEL1, false); + + if (register_die_notifier(&vmx_vcpu_nb)) { + printk(KERN_ALERT "[hw_vpmu]: Register NMI handler failed..\n"); + } else { + printk(KERN_ALERT "[hw_vpmu]: Register NMI handler succeeded..\n"); + } + if (enable_ept) { bypass_guest_pf = 0; kvm_mmu_set_base_ptes(VMX_EPT_READABLE_MASK | @@ -4071,6 +4216,9 @@ static void __exit vmx_exit(void) free_page((unsigned long)vmx_io_bitmap_b); free_page((unsigned long)vmx_io_bitmap_a); + unregister_die_notifier(&vmx_vcpu_nb); + printk(KERN_ALERT "[hw_vpmu]: Remove NMI handler module..\n"); + kvm_exit(); } diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index ae07d26..1abedb4 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -3615,6 +3615,11 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) goto out; } + if (vcpu->cntr_overflow) { + vcpu->arch.nmi_pending = 1; + vcpu->cntr_overflow = 0; + } + inject_pending_event(vcpu, kvm_run); /* enable NMI/IRQ window open exits if needed */ diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index b7bbb5d..96d63d1 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -99,6 +99,9 @@ struct kvm_vcpu { gpa_t mmio_phys_addr; #endif + int cntr_overflow; + int nmi_nr; + struct kvm_vcpu_arch arch; }; -- To unsubscribe from this list: send the line "unsubscribe kvm" in