@@ -92,9 +92,142 @@ struct shared_msr_entry {
u64 mask;
};
+struct __attribute__ ((__packed__)) shadow_vmcs {
+ u16 virtual_processor_id;
+ u16 guest_es_selector;
+ u16 guest_cs_selector;
+ u16 guest_ss_selector;
+ u16 guest_ds_selector;
+ u16 guest_fs_selector;
+ u16 guest_gs_selector;
+ u16 guest_ldtr_selector;
+ u16 guest_tr_selector;
+ u16 host_es_selector;
+ u16 host_cs_selector;
+ u16 host_ss_selector;
+ u16 host_ds_selector;
+ u16 host_fs_selector;
+ u16 host_gs_selector;
+ u16 host_tr_selector;
+ u64 io_bitmap_a;
+ u64 io_bitmap_b;
+ u64 msr_bitmap;
+ u64 vm_exit_msr_store_addr;
+ u64 vm_exit_msr_load_addr;
+ u64 vm_entry_msr_load_addr;
+ u64 tsc_offset;
+ u64 virtual_apic_page_addr;
+ u64 apic_access_addr;
+ u64 ept_pointer;
+ u64 guest_physical_address;
+ u64 vmcs_link_pointer;
+ u64 guest_ia32_debugctl;
+ u64 guest_ia32_pat;
+ u64 guest_pdptr0;
+ u64 guest_pdptr1;
+ u64 guest_pdptr2;
+ u64 guest_pdptr3;
+ u64 host_ia32_pat;
+ u32 pin_based_vm_exec_control;
+ u32 cpu_based_vm_exec_control;
+ u32 exception_bitmap;
+ u32 page_fault_error_code_mask;
+ u32 page_fault_error_code_match;
+ u32 cr3_target_count;
+ u32 vm_exit_controls;
+ u32 vm_exit_msr_store_count;
+ u32 vm_exit_msr_load_count;
+ u32 vm_entry_controls;
+ u32 vm_entry_msr_load_count;
+ u32 vm_entry_intr_info_field;
+ u32 vm_entry_exception_error_code;
+ u32 vm_entry_instruction_len;
+ u32 tpr_threshold;
+ u32 secondary_vm_exec_control;
+ u32 vm_instruction_error;
+ u32 vm_exit_reason;
+ u32 vm_exit_intr_info;
+ u32 vm_exit_intr_error_code;
+ u32 idt_vectoring_info_field;
+ u32 idt_vectoring_error_code;
+ u32 vm_exit_instruction_len;
+ u32 vmx_instruction_info;
+ u32 guest_es_limit;
+ u32 guest_cs_limit;
+ u32 guest_ss_limit;
+ u32 guest_ds_limit;
+ u32 guest_fs_limit;
+ u32 guest_gs_limit;
+ u32 guest_ldtr_limit;
+ u32 guest_tr_limit;
+ u32 guest_gdtr_limit;
+ u32 guest_idtr_limit;
+ u32 guest_es_ar_bytes;
+ u32 guest_cs_ar_bytes;
+ u32 guest_ss_ar_bytes;
+ u32 guest_ds_ar_bytes;
+ u32 guest_fs_ar_bytes;
+ u32 guest_gs_ar_bytes;
+ u32 guest_ldtr_ar_bytes;
+ u32 guest_tr_ar_bytes;
+ u32 guest_interruptibility_info;
+ u32 guest_activity_state;
+ u32 guest_sysenter_cs;
+ u32 host_ia32_sysenter_cs;
+ unsigned long cr0_guest_host_mask;
+ unsigned long cr4_guest_host_mask;
+ unsigned long cr0_read_shadow;
+ unsigned long cr4_read_shadow;
+ unsigned long cr3_target_value0;
+ unsigned long cr3_target_value1;
+ unsigned long cr3_target_value2;
+ unsigned long cr3_target_value3;
+ unsigned long exit_qualification;
+ unsigned long guest_linear_address;
+ unsigned long guest_cr0;
+ unsigned long guest_cr3;
+ unsigned long guest_cr4;
+ unsigned long guest_es_base;
+ unsigned long guest_cs_base;
+ unsigned long guest_ss_base;
+ unsigned long guest_ds_base;
+ unsigned long guest_fs_base;
+ unsigned long guest_gs_base;
+ unsigned long guest_ldtr_base;
+ unsigned long guest_tr_base;
+ unsigned long guest_gdtr_base;
+ unsigned long guest_idtr_base;
+ unsigned long guest_dr7;
+ unsigned long guest_rsp;
+ unsigned long guest_rip;
+ unsigned long guest_rflags;
+ unsigned long guest_pending_dbg_exceptions;
+ unsigned long guest_sysenter_esp;
+ unsigned long guest_sysenter_eip;
+ unsigned long host_cr0;
+ unsigned long host_cr3;
+ unsigned long host_cr4;
+ unsigned long host_fs_base;
+ unsigned long host_gs_base;
+ unsigned long host_tr_base;
+ unsigned long host_gdtr_base;
+ unsigned long host_idtr_base;
+ unsigned long host_ia32_sysenter_esp;
+ unsigned long host_ia32_sysenter_eip;
+ unsigned long host_rsp;
+ unsigned long host_rip;
+};
+
+
struct __attribute__ ((__packed__)) level_state {
/* Has the level1 guest done vmclear? */
bool vmclear;
+
+ u64 io_bitmap_a;
+ u64 io_bitmap_b;
+ u64 msr_bitmap;
+
+ bool first_launch;
};
/*
@@ -122,6 +255,8 @@ struct nested_vmx {
gpa_t current_vmptr;
/* Level 1 state for switching to level 2 and back */
struct level_state *l1_state;
+ /* Level 1 shadow vmcs for switching to level 2 and back */
+ struct shadow_vmcs *l1_shadow_vmcs;
/* list of vmcs for each l2 guest created by l1 */
struct list_head l2_vmcs_list;
/* l2 page corresponding to the current vmcs set by l1 */
@@ -187,10 +322,7 @@ static struct page *nested_get_page(struct kvm_vcpu *vcpu,
{
struct page *vmcs_page = NULL;
- down_read(¤t->mm->mmap_sem);
vmcs_page = gfn_to_page(vcpu->kvm, vmcs_addr >> PAGE_SHIFT);
- up_read(¤t->mm->mmap_sem);
-
if (is_error_page(vmcs_page)) {
printk(KERN_ERR "%s error allocating page 0x%llx\n",
__func__, vmcs_addr);
@@ -832,13 +964,14 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
if (per_cpu(current_vmcs, cpu) != vmx->vmcs) {
u8 error;
-
per_cpu(current_vmcs, cpu) = vmx->vmcs;
+
asm volatile (__ex(ASM_VMX_VMPTRLD_RAX) "; setna %0"
: "=g"(error) : "a"(&phys_addr), "m"(phys_addr)
: "cc");
+
if (error)
- printk(KERN_ERR "kvm: vmptrld %p/%llx fail\n",
+ printk(KERN_ERR "kvm: vmptrld %p/%llx failed\n",
vmx->vmcs, phys_addr);
}
@@ -1240,6 +1373,7 @@ static int nested_vmx_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data)
return 0;
}
+
static int read_guest_vmcs_gpa(struct kvm_vcpu *vcpu, gva_t gva, u64 *gentry)
{
int r = 0;
@@ -1430,6 +1564,18 @@ static struct level_state *create_state(void)
return state;
}
+static struct vmcs *nested_get_current_vmcs(struct kvm_vcpu *vcpu)
+{
+ struct vcpu_vmx *vmx = to_vmx(vcpu);
+ struct nested_vmcs_list *list_item, *n;
+
+ list_for_each_entry_safe(list_item, n, &vmx->nested.l2_vmcs_list, list)
+ if (list_item->vmcs_addr == vmx->nested.current_vmptr)
+ return list_item->l2_vmcs;
+
+ return NULL;
+}
+
static int create_l1_state(struct kvm_vcpu *vcpu)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
@@ -1441,10 +1587,75 @@ static int create_l1_state(struct kvm_vcpu *vcpu)
} else
return 0;
+ vmx->nested.l1_shadow_vmcs = kzalloc(PAGE_SIZE, GFP_KERNEL);
+ if (!vmx->nested.l1_shadow_vmcs) {
+ printk(KERN_INFO "%s could not allocate memory for l1_shadow vmcs\n",
+ __func__);
+ kfree(vmx->nested.l1_state);
+ return -ENOMEM;
+ }
+
INIT_LIST_HEAD(&(vmx->nested.l2_vmcs_list));
return 0;
}
+static struct vmcs *alloc_vmcs(void);
+int create_l2_state(struct kvm_vcpu *vcpu)
+{
+ struct vcpu_vmx *vmx = to_vmx(vcpu);
+ struct vmcs *l2_vmcs;
+
+ if (!nested_map_current(vcpu)) {
+ printk(KERN_ERR "%s error mapping level 2 page", __func__);
+ return -ENOMEM;
+ }
+
+ l2_vmcs = nested_get_current_vmcs(vcpu);
+ if (!l2_vmcs) {
+ struct nested_vmcs_list *new_l2_guest =
+ (struct nested_vmcs_list *)
+ kmalloc(sizeof(struct nested_vmcs_list), GFP_KERNEL);
+
+ if (!new_l2_guest) {
+ printk(KERN_ERR "%s error could not allocate memory for a new l2 guest list item",
+ __func__);
+ nested_unmap_current(vcpu);
+ return -ENOMEM;
+ }
+
+ l2_vmcs = alloc_vmcs();
+
+ if (!l2_vmcs) {
+ printk(KERN_ERR "%s error could not allocate memory for l2_vmcs",
+ __func__);
+ kfree(new_l2_guest);
+ nested_unmap_current(vcpu);
+ return -ENOMEM;
+ }
+
+ new_l2_guest->vmcs_addr = vmx->nested.current_vmptr;
+ new_l2_guest->l2_vmcs = l2_vmcs;
+ list_add(&(new_l2_guest->list), &(vmx->nested.l2_vmcs_list));
+ }
+
+ if (cpu_has_vmx_msr_bitmap())
+ vmx->nested.current_l2_page->l2_state.msr_bitmap =
+ vmcs_read64(MSR_BITMAP);
+ else
+ vmx->nested.current_l2_page->l2_state.msr_bitmap = 0;
+
+ vmx->nested.current_l2_page->l2_state.io_bitmap_a =
+ vmcs_read64(IO_BITMAP_A);
+ vmx->nested.current_l2_page->l2_state.io_bitmap_b =
+ vmcs_read64(IO_BITMAP_B);
+
+ vmx->nested.current_l2_page->l2_state.first_launch = true;
+
+ nested_unmap_current(vcpu);
+
+ return 0;
+}
+
/* Just like cpu_vmxoff(), but with the __kvm_handle_fault_on_reboot()
* tricks.
*/
@@ -1623,6 +1834,7 @@ static void nested_free_current_vmcs(struct kvm_vcpu *vcpu)
if (list_item->vmcs_addr == vmx->nested.current_vmptr) {
free_vmcs(list_item->l2_vmcs);
list_del(&(list_item->list));
+ kfree(list_item);
return;
}
}
@@ -1637,11 +1849,14 @@ static void free_l1_state(struct kvm_vcpu *vcpu)
kfree(vmx->nested.l1_state);
vmx->nested.l1_state = NULL;
+ kfree(vmx->nested.l1_shadow_vmcs);
+ vmx->nested.l1_shadow_vmcs = NULL;
list_for_each_entry_safe(list_item, n, &vmx->nested.l2_vmcs_list,
list) {
free_vmcs(list_item->l2_vmcs);
list_del(&(list_item->list));
+ kfree(list_item);
}
}
@@ -3604,6 +3819,7 @@ static int handle_vmon(struct kvm_vcpu *vcpu)
{
struct kvm_segment cs;
struct vcpu_vmx *vmx = to_vmx(vcpu);
+ int r = 0;
if (!nested) {
pr_debug("%s: nested vmx not enabled\n", __func__);
@@ -3633,8 +3849,9 @@ static int handle_vmon(struct kvm_vcpu *vcpu)
return 1;
}
- if (create_l1_state(vcpu)) {
- printk(KERN_ERR "%s create_l1_state failed\n", __func__);
+ r = create_l1_state(vcpu);
+ if (r) {
+ printk(KERN_ERR "%s create_l1_state failed: %d\n", __func__, r);
kvm_queue_exception(vcpu, UD_VECTOR);
return 1;
}
@@ -3645,6 +3862,63 @@ static int handle_vmon(struct kvm_vcpu *vcpu)
return 1;
}
+static int handle_vmptrld(struct kvm_vcpu *vcpu)
+{
+ struct vcpu_vmx *vmx = to_vmx(vcpu);
+ u64 guest_vmcs_addr;
+ gva_t vmcs_gva;
+ unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
+ u32 vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO);
+ int r = 0;
+
+ if (!nested_vmx_check_permission(vcpu))
+ return 1;
+
+ vmcs_gva = get_vmx_mem_address(vcpu, exit_qualification,
+ vmx_instruction_info);
+
+ if (read_guest_vmcs_gpa(vcpu, vmcs_gva, &guest_vmcs_addr))
+ return 1;
+
+ if (vmx->nested.current_vmptr != guest_vmcs_addr) {
+ vmx->nested.current_vmptr = guest_vmcs_addr;
+ r = create_l2_state(vcpu);
+ if (r) {
+ printk(KERN_ERR "%s create_l2_state failed: %d\n",
+ __func__, r);
+ return 1;
+ }
+ }
+
+ clear_rflags_cf_zf(vcpu);
+ skip_emulated_instruction(vcpu);
+ return 1;
+}
+
+static int handle_vmptrst(struct kvm_vcpu *vcpu)
+{
+ int r = 0;
+ unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
+ u32 vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO);
+ gva_t vmcs_gva;
+
+ if (!nested_vmx_check_permission(vcpu))
+ return 1;
+ vmcs_gva = get_vmx_mem_address(vcpu, exit_qualification,
+ vmx_instruction_info);
+
+ r = kvm_write_guest_virt(vmcs_gva,
+ (void *)&to_vmx(vcpu)->nested.current_vmptr,
+ sizeof(u64), vcpu);
+ if (r) {
+ printk(KERN_INFO "%s failed to write vmptr\n", __func__);
+ return 1;
+ }
+ clear_rflags_cf_zf(vcpu);
+ skip_emulated_instruction(vcpu);
+ return 1;
+}
+
static int handle_invlpg(struct kvm_vcpu *vcpu)
{
unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
@@ -3930,8 +4204,8 @@ static int (*kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = {
[EXIT_REASON_VMCALL] = handle_vmcall,
[EXIT_REASON_VMCLEAR] = handle_vmclear,
[EXIT_REASON_VMLAUNCH] = handle_vmx_insn,
- [EXIT_REASON_VMPTRLD] = handle_vmx_insn,
- [EXIT_REASON_VMPTRST] = handle_vmx_insn,
+ [EXIT_REASON_VMPTRLD] = handle_vmptrld,
+ [EXIT_REASON_VMPTRST] = handle_vmptrst,
[EXIT_REASON_VMREAD] = handle_vmx_insn,
[EXIT_REASON_VMRESUME] = handle_vmx_insn,
[EXIT_REASON_VMWRITE] = handle_vmx_insn,
@@ -2804,8 +2804,8 @@ out:
}
EXPORT_SYMBOL_GPL(kvm_read_guest_virt);
-static int kvm_write_guest_virt(gva_t addr, void *val, unsigned int bytes,
- struct kvm_vcpu *vcpu)
+int kvm_write_guest_virt(gva_t addr, void *val, unsigned int bytes,
+ struct kvm_vcpu *vcpu)
{
void *data = val;
int r = X86EMUL_CONTINUE;
@@ -2833,7 +2833,7 @@ static int kvm_write_guest_virt(gva_t addr, void *val, unsigned int bytes,
out:
return r;
}
-
+EXPORT_SYMBOL_GPL(kvm_write_guest_virt);
static int emulator_read_emulated(unsigned long addr,
void *val,
@@ -38,6 +38,9 @@ struct kvm_cpuid_entry2 *kvm_find_cpuid_entry(struct kvm_vcpu *vcpu,
int kvm_read_guest_virt(gva_t addr, void *val, unsigned int bytes,
struct kvm_vcpu *vcpu);
+int kvm_write_guest_virt(gva_t addr, void *val, unsigned int bytes,
+ struct kvm_vcpu *vcpu);
+
extern int nested;
#endif