@@ -412,6 +412,10 @@ struct kvm_arch {
s64 kvmclock_offset;
struct kvm_xen_hvm_config xen_hvm_config;
+
+ /* fields used by HYPER-V emulation */
+ u64 hv_guest_os_id;
+ u64 hv_hypercall;
};
struct kvm_vm_stat {
new file mode 100644
@@ -0,0 +1,187 @@
+#ifndef _ASM_X86_KVM_HYPERV_H
+#define _ASM_X86_KVM_HYPERV_H
+
+#include <linux/types.h>
+
+/*
+ * The below CPUID leaves are present if VersionAndFeatures.HypervisorPresent
+ * is set by CPUID(HvCpuIdFunctionVersionAndFeatures).
+ */
+#define HYPERV_CPUID_VENDOR_AND_MAX_FUNCTIONS 0x40000000
+#define HYPERV_CPUID_INTERFACE 0x40000001
+#define HYPERV_CPUID_VERSION 0x40000002
+#define HYPERV_CPUID_FEATURES 0x40000003
+#define HYPERV_CPUID_ENLIGHTMENT_INFO 0x40000004
+#define HYPERV_CPUID_IMPLEMENT_LIMITS 0x40000005
+
+/*
+ * Feature identification. EAX indicates which features are available
+ * to the partition based upon the current partition privileges.
+ */
+
+/* VP Runtime (HV_X64_MSR_VP_RUNTIME) available */
+#define HV_X64_MSR_VP_RUNTIME_AVAILABLE (1 << 0)
+/* Partition Reference Counter (HV_X64_MSR_TIME_REF_COUNT) available*/
+#define HV_X64_MSR_TIME_REF_COUNT_AVAILABLE (1 << 1)
+/*
+ * Basic SynIC MSRs (HV_X64_MSR_SCONTROL through HV_X64_MSR_EOM
+ * and HV_X64_MSR_SINT0 through HV_X64_MSR_SINT15) available
+ */
+#define HV_X64_MSR_SYNIC_AVAILABLE (1 << 2)
+/*
+ * Synthetic Timer MSRs (HV_X64_MSR_STIMER0_CONFIG through
+ * HV_X64_MSR_STIMER3_COUNT) available
+ */
+#define HV_X64_MSR_SYNTIMER_AVAILABLE (1 << 3)
+/*
+ * APIC access MSRs (HV_X64_MSR_EOI, HV_X64_MSR_ICR and HV_X64_MSR_TPR)
+ * are available
+ */
+#define HV_X64_MSR_APIC_ACCESS_AVAILABLE (1 << 4)
+/* Hypercall MSRs (HV_X64_MSR_GUEST_OS_ID and HV_X64_MSR_HYPERCALL) available*/
+#define HV_X64_MSR_HYPERCALL_AVAILABLE (1 << 5)
+/* Access virtual processor index MSR (HV_X64_MSR_VP_INDEX) available*/
+#define HV_X64_MSR_VP_INDEX_AVAILABLE (1 << 6)
+/* Virtual system reset MSR (HV_X64_MSR_RESET) is available*/
+#define HV_X64_MSR_RESET_AVAILABLE (1 << 7)
+ /*
+ * Access statistics pages MSRs (HV_X64_MSR_STATS_PARTITION_RETAIL_PAGE,
+ * HV_X64_MSR_STATS_PARTITION_INTERNAL_PAGE, HV_X64_MSR_STATS_VP_RETAIL_PAGE,
+ * HV_X64_MSR_STATS_VP_INTERNAL_PAGE) available
+ */
+#define HV_X64_MSR_STAT_PAGES_AVAILABLE (1 << 8)
+
+/*
+ * Feature identification: EBX indicates which flags were specified at
+ * partition creation. The format is the same as the partition creation
+ * flag structure defined in section Partition Creation Flags.
+ */
+#define HV_X64_CREATE_PARTITIONS (1 << 0)
+#define HV_X64_ACCESS_PARTITION_ID (1 << 1)
+#define HV_X64_ACCESS_MEMORY_POOL (1 << 2)
+#define HV_X64_ADJUST_MESSAGE_BUFFERS (1 << 3)
+#define HV_X64_POST_MESSAGES (1 << 4)
+#define HV_X64_SIGNAL_EVENTS (1 << 5)
+#define HV_X64_CREATE_PORT (1 << 6)
+#define HV_X64_CONNECT_PORT (1 << 7)
+#define HV_X64_ACCESS_STATS (1 << 8)
+#define HV_X64_DEBUGGING (1 << 11)
+#define HV_X64_CPU_POWER_MANAGEMENT (1 << 12)
+#define HV_X64_CONFIGURE_PROFILER (1 << 13)
+
+/*
+ * Feature identification. EDX indicates which miscellaneous features
+ * are available to the partition.
+ */
+/* The MWAIT instruction is available (per section MONITOR / MWAIT) */
+#define HV_X64_MWAIT_AVAILABLE (1 << 0)
+/* Guest debugging support is available */
+#define HV_X64_GUEST_DEBUGGING_AVAILABLE (1 << 1)
+/* Performance Monitor support is available*/
+#define HV_X64_PERF_MONITOR_AVAILABLE (1 << 2)
+/* Support for physical CPU dynamic partitioning events is available*/
+#define HV_X64_CPU_DYNAMIC_PARTITIONING_AVAILABLE (1 << 3)
+/*
+ * Support for passing hypercall input parameter block via XMM
+ * registers is available
+ */
+#define HV_X64_HYPERCALL_PARAMS_XMM_AVAILABLE (1 << 4)
+/* Support for a virtual guest idle state is available */
+#define HV_X64_GUEST_IDLE_STATE_AVAILABLE (1 << 5)
+
+/*
+ * Implementation recommendations. Indicates which behaviors the hypervisor
+ * recommends the OS implement for optimal performance.
+ */
+ /*
+ * Recommend using hypercall for address space switches rather
+ * than MOV to CR3 instruction
+ */
+#define HV_X64_MWAIT_RECOMMENDED (1 << 0)
+/* Recommend using hypercall for local TLB flushes rather
+ * than INVLPG or MOV to CR3 instructions */
+#define HV_X64_LOCAL_TLB_FLUSH_RECOMMENDED (1 << 1)
+/*
+ * Recommend using hypercall for remote TLB flushes rather
+ * than inter-processor interrupts
+ */
+#define HV_X64_REMOTE_TLB_FLUSH_RECOMMENDED (1 << 2)
+/*
+ * Recommend using MSRs for accessing APIC registers
+ * EOI, ICR and TPR rather than their memory-mapped counterparts
+ */
+#define HV_X64_APIC_ACCESS_RECOMMENDED (1 << 3)
+/* Recommend using the hypervisor-provided MSR to initiate a system RESET */
+#define HV_X64_SYSTEM_RESET_RECOMMENDED (1 << 4)
+/*
+ * Recommend using relaxed timing for this partition. If used,
+ * the VM should disable any watchdog timeouts that rely on the
+ * timely delivery of external interrupts
+ */
+#define HV_X64_RELAXED_TIMING_RECOMMENDED (1 << 5)
+
+/* MSR used to identify the guest OS. */
+#define HV_X64_MSR_GUEST_OS_ID 0x40000000
+
+/* MSR used to setup pages used to communicate with the hypervisor. */
+#define HV_X64_MSR_HYPERCALL 0x40000001
+
+/* MSR used to provide vcpu index */
+#define HV_X64_MSR_VP_INDEX 0x40000002
+
+/* Define the virtual APIC registers */
+#define HV_X64_MSR_EOI 0x40000070
+#define HV_X64_MSR_ICR 0x40000071
+#define HV_X64_MSR_TPR 0x40000072
+#define HV_X64_MSR_APIC_ASSIST_PAGE 0x40000073
+
+/* Define synthetic interrupt controller model specific registers. */
+#define HV_X64_MSR_SCONTROL 0x40000080
+#define HV_X64_MSR_SVERSION 0x40000081
+#define HV_X64_MSR_SIEFP 0x40000082
+#define HV_X64_MSR_SIMP 0x40000083
+#define HV_X64_MSR_EOM 0x40000084
+#define HV_X64_MSR_SINT0 0x40000090
+#define HV_X64_MSR_SINT1 0x40000091
+#define HV_X64_MSR_SINT2 0x40000092
+#define HV_X64_MSR_SINT3 0x40000093
+#define HV_X64_MSR_SINT4 0x40000094
+#define HV_X64_MSR_SINT5 0x40000095
+#define HV_X64_MSR_SINT6 0x40000096
+#define HV_X64_MSR_SINT7 0x40000097
+#define HV_X64_MSR_SINT8 0x40000098
+#define HV_X64_MSR_SINT9 0x40000099
+#define HV_X64_MSR_SINT10 0x4000009A
+#define HV_X64_MSR_SINT11 0x4000009B
+#define HV_X64_MSR_SINT12 0x4000009C
+#define HV_X64_MSR_SINT13 0x4000009D
+#define HV_X64_MSR_SINT14 0x4000009E
+#define HV_X64_MSR_SINT15 0x4000009F
+
+
+#define HV_X64_MSR_HYPERCALL_ENABLE 0x00000001
+#define HV_X64_MSR_HYPERCALL_PAGE_ADDRESS_SHIFT 12
+#define HV_X64_MSR_HYPERCALL_PAGE_ADDRESS_MASK \
+ (~((1ull << HV_X64_MSR_HYPERCALL_PAGE_ADDRESS_SHIFT) - 1))
+
+/* Declare the various hypercall operations. */
+#define HV_X64_HV_NOTIFY_LONG_SPIN_WAIT 0x0008
+
+#define HV_X64_MSR_APIC_ASSIST_PAGE_ENABLE 0x00000001
+#define HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_SHIFT 12
+#define HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_MASK \
+ (~((1ull << HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_SHIFT) - 1))
+
+#define HV_PROCESSOR_POWER_STATE_C0 0
+#define HV_PROCESSOR_POWER_STATE_C1 1
+#define HV_PROCESSOR_POWER_STATE_C2 2
+#define HV_PROCESSOR_POWER_STATE_C3 3
+
+/* hypercall status code */
+#define HV_STATUS_SUCCESS 0
+#define HV_STATUS_INVALID_HYPERCALL_CODE 2
+#define HV_STATUS_INVALID_HYPERCALL_INPUT 3
+#define HV_STATUS_INVALID_ALIGNMENT 4
+
+#endif
+
@@ -2,6 +2,7 @@
#define _ASM_X86_KVM_PARA_H
#include <linux/types.h>
+#include <asm/kvm_hyperv.h>
/* This CPUID returns the signature 'KVMKVMKVM' in ebx, ecx, and edx. It
* should be used to determine that a VM is running under KVM.
@@ -56,6 +56,38 @@ TRACE_EVENT(kvm_hypercall,
);
/*
+ * Tracepoint for hypercall.
+ */
+TRACE_EVENT(kvm_hv_hypercall,
+ TP_PROTO(__u16 code, bool fast, __u16 rep_cnt, __u16 rep_idx,
+ __u64 ingpa, __u64 outgpa),
+ TP_ARGS(code, fast, rep_cnt, rep_idx, ingpa, outgpa),
+
+ TP_STRUCT__entry(
+ __field( __u16, code )
+ __field( bool, fast )
+ __field( __u16, rep_cnt )
+ __field( __u16, rep_idx )
+ __field( __u64, ingpa )
+ __field( __u64, outgpa )
+ ),
+
+ TP_fast_assign(
+ __entry->code = code;
+ __entry->fast = fast;
+ __entry->rep_cnt = rep_cnt;
+ __entry->rep_idx = rep_idx;
+ __entry->ingpa = ingpa;
+ __entry->outgpa = outgpa;
+ ),
+
+ TP_printk("code 0x%x %s cnt 0x%x idx 0x%x in 0x%llx out 0x%llx",
+ __entry->code, __entry->fast ? "fast":"slow",
+ __entry->rep_cnt, __entry->rep_idx, __entry->ingpa,
+ __entry->outgpa)
+);
+
+/*
* Tracepoint for PIO.
*/
TRACE_EVENT(kvm_pio,
@@ -628,7 +628,8 @@ static u32 msrs_to_save[] = {
#ifdef CONFIG_X86_64
MSR_CSTAR, MSR_KERNEL_GS_BASE, MSR_SYSCALL_MASK, MSR_LSTAR,
#endif
- MSR_IA32_TSC, MSR_IA32_PERF_STATUS, MSR_IA32_CR_PAT, MSR_VM_HSAVE_PA
+ MSR_IA32_TSC, MSR_IA32_PERF_STATUS, MSR_IA32_CR_PAT, MSR_VM_HSAVE_PA,
+ HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL
};
static unsigned num_msrs_to_save;
@@ -1003,6 +1004,69 @@ out:
return r;
}
+static bool kvm_hv_hypercall_enabled(struct kvm *kvm)
+{
+ return !!(kvm->arch.hv_hypercall & HV_X64_MSR_HYPERCALL_ENABLE);
+}
+
+static bool kvm_hv_msr_partition_wide(u32 msr)
+{
+ bool r = false;
+ switch (msr) {
+ case HV_X64_MSR_GUEST_OS_ID:
+ case HV_X64_MSR_HYPERCALL:
+ r = true;
+ break;
+ }
+
+ return r;
+}
+
+static int set_msr_hyperv_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data)
+{
+ struct kvm *kvm = vcpu->kvm;
+
+ switch (msr) {
+ case HV_X64_MSR_GUEST_OS_ID:
+ kvm->arch.hv_guest_os_id = data;
+ /* setting guest os id to zero disables hypercall page */
+ if (!kvm->arch.hv_guest_os_id)
+ kvm->arch.hv_hypercall &= ~HV_X64_MSR_HYPERCALL_ENABLE;
+ break;
+ case HV_X64_MSR_HYPERCALL: {
+ u64 gfn;
+ unsigned long addr;
+ /* if guest os id is not set hypercall should remain disabled */
+ if (!kvm->arch.hv_guest_os_id && data)
+ break;
+ kvm->arch.hv_hypercall = data;
+ if(!kvm_hv_hypercall_enabled(kvm))
+ break;
+ gfn = kvm->arch.hv_hypercall >>
+ HV_X64_MSR_HYPERCALL_PAGE_ADDRESS_SHIFT;
+ addr = gfn_to_hva(kvm, gfn);
+ if (kvm_is_error_hva(addr))
+ return 1;
+ kvm_x86_ops->patch_hypercall(vcpu, (unsigned char*)addr);
+ ((unsigned char*)addr)[3] = 0xc3; /* ret */
+ break;
+ }
+ default:
+ pr_unimpl(vcpu, "HYPER-V unimplemented wrmsr: 0x%x "
+ "data 0x%llx\n", msr, data);
+ return 1;
+ }
+ return 0;
+}
+
+static int set_msr_hyperv(struct kvm_vcpu *vcpu, u32 msr, u64 data)
+{
+ pr_unimpl(vcpu, "HYPER-V unimplemented wrmsr: 0x%x data 0x%llx\n",
+ msr, data);
+
+ return 1;
+}
+
int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data)
{
switch (msr) {
@@ -1117,6 +1181,16 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data)
pr_unimpl(vcpu, "unimplemented perfctr wrmsr: "
"0x%x data 0x%llx\n", msr, data);
break;
+ case HV_X64_MSR_GUEST_OS_ID ... HV_X64_MSR_SINT15:
+ if (kvm_hv_msr_partition_wide(msr)) {
+ int r;
+ mutex_lock(&vcpu->kvm->lock);
+ r = set_msr_hyperv_pw(vcpu, msr, data);
+ mutex_unlock(&vcpu->kvm->lock);
+ return r;
+ } else
+ return set_msr_hyperv(vcpu, msr, data);
+ break;
default:
if (msr && (msr == vcpu->kvm->arch.xen_hvm_config.msr))
return xen_hvm_config(vcpu, data);
@@ -1216,6 +1290,48 @@ static int get_msr_mce(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
return 0;
}
+static int get_msr_hyperv_pw(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
+{
+ u64 data = 0;
+ struct kvm *kvm = vcpu->kvm;
+
+ switch (msr) {
+ case HV_X64_MSR_GUEST_OS_ID:
+ data = kvm->arch.hv_guest_os_id;
+ break;
+ case HV_X64_MSR_HYPERCALL:
+ data = kvm->arch.hv_hypercall;
+ break;
+ default:
+ pr_unimpl(vcpu, "Hyper-V unhandled rdmsr: 0x%x\n", msr);
+ return 1;
+ }
+
+ *pdata = data;
+ return 0;
+}
+
+static int get_msr_hyperv(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
+{
+ u64 data = 0;
+
+ switch (msr) {
+ case HV_X64_MSR_VP_INDEX: {
+ int r;
+ struct kvm_vcpu *v;
+ kvm_for_each_vcpu(r, v, vcpu->kvm)
+ if (v == vcpu)
+ data = r;
+ break;
+ }
+ default:
+ pr_unimpl(vcpu, "Hyper-V unhandled rdmsr: 0x%x\n", msr);
+ return 1;
+ }
+ *pdata = data;
+ return 0;
+}
+
int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
{
u64 data;
@@ -1282,6 +1398,16 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
case MSR_IA32_MCG_STATUS:
case MSR_IA32_MC0_CTL ... MSR_IA32_MC0_CTL + 4 * KVM_MAX_MCE_BANKS - 1:
return get_msr_mce(vcpu, msr, pdata);
+ case HV_X64_MSR_GUEST_OS_ID ... HV_X64_MSR_SINT15:
+ if (kvm_hv_msr_partition_wide(msr)) {
+ int r;
+ mutex_lock(&vcpu->kvm->lock);
+ r = get_msr_hyperv_pw(vcpu, msr, pdata);
+ mutex_unlock(&vcpu->kvm->lock);
+ return r;
+ } else
+ return get_msr_hyperv(vcpu, msr, pdata);
+ break;
default:
if (!ignore_msrs) {
pr_unimpl(vcpu, "unhandled rdmsr: 0x%x\n", msr);
@@ -1397,6 +1523,7 @@ int kvm_dev_ioctl_check_extension(long ext)
case KVM_CAP_XEN_HVM:
case KVM_CAP_ADJUST_CLOCK:
case KVM_CAP_VCPU_EVENTS:
+ case KVM_CAP_HYPERV:
r = 1;
break;
case KVM_CAP_COALESCED_MMIO:
@@ -3617,11 +3744,66 @@ static inline gpa_t hc_gpa(struct kvm_vcpu *vcpu, unsigned long a0,
return a0 | ((gpa_t)a1 << 32);
}
+int kvm_hv_hypercall(struct kvm_vcpu *vcpu)
+{
+ u64 param, ingpa, outgpa, ret;
+ uint16_t code, rep_idx, rep_cnt, res = HV_STATUS_SUCCESS, rep_done = 0;
+ bool fast, longmode;
+ int cs_db, cs_l;
+
+ /*
+ * hypercall generates UD from non zero cpl and real mode
+ * per HYPER-V spec
+ */
+ if (kvm_x86_ops->get_cpl(vcpu) != 0 || !(vcpu->arch.cr0 & X86_CR0_PE)) {
+ kvm_queue_exception(vcpu, UD_VECTOR);
+ return 0;
+ }
+
+ kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l);
+ longmode = is_long_mode(vcpu) && cs_l == 1;
+
+ if (longmode) {
+ param = kvm_register_read(vcpu, VCPU_REGS_RCX);
+ ingpa = kvm_register_read(vcpu, VCPU_REGS_RDX);
+ outgpa = kvm_register_read(vcpu, VCPU_REGS_R8);
+ } else {
+ param = (kvm_register_read(vcpu, VCPU_REGS_RDX) << 32) |
+ (kvm_register_read(vcpu, VCPU_REGS_RAX) & 0xffffff);
+ ingpa = (kvm_register_read(vcpu, VCPU_REGS_RBX) << 32) |
+ (kvm_register_read(vcpu, VCPU_REGS_RCX) & 0xffffff);
+ outgpa = (kvm_register_read(vcpu, VCPU_REGS_RDI) << 32) |
+ (kvm_register_read(vcpu, VCPU_REGS_RSI) & 0xffffff);
+ }
+
+ code = param & 0xffff;
+ fast = (param >> 16) & 0x1;
+ rep_cnt = (param >> 32) & 0xfff;
+ rep_idx = (param >> 48) & 0xfff;
+
+ trace_kvm_hv_hypercall(code, fast, rep_cnt, rep_idx, ingpa, outgpa);
+
+ res = HV_STATUS_INVALID_HYPERCALL_CODE;
+
+ ret = res | (((u64)rep_done & 0xfff) << 32);
+ if (longmode) {
+ kvm_register_write(vcpu, VCPU_REGS_RAX, ret);
+ } else {
+ kvm_register_write(vcpu, VCPU_REGS_RDX, ret >> 32);
+ kvm_register_write(vcpu, VCPU_REGS_RAX, ret & 0xffffffff);
+ }
+
+ return 1;
+}
+
int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
{
unsigned long nr, a0, a1, a2, a3, ret;
int r = 1;
+ if(kvm_hv_hypercall_enabled(vcpu->kvm))
+ return kvm_hv_hypercall(vcpu);
+
nr = kvm_register_read(vcpu, VCPU_REGS_RAX);
a0 = kvm_register_read(vcpu, VCPU_REGS_RBX);
a1 = kvm_register_read(vcpu, VCPU_REGS_RCX);
@@ -497,6 +497,7 @@ struct kvm_ioeventfd {
#endif
#define KVM_CAP_S390_PSW 42
#define KVM_CAP_PPC_SEGSTATE 43
+#define KVM_CAP_HYPERV 44
#ifdef KVM_CAP_IRQ_ROUTING