From patchwork Sat Apr 9 10:07:11 2011 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Pekka Enberg X-Patchwork-Id: 695751 Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by demeter1.kernel.org (8.14.4/8.14.3) with ESMTP id p39A7ZZS013323 for ; Sat, 9 Apr 2011 10:07:35 GMT Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1753964Ab1DIKHS (ORCPT ); Sat, 9 Apr 2011 06:07:18 -0400 Received: from filtteri1.pp.htv.fi ([213.243.153.184]:56696 "EHLO filtteri1.pp.htv.fi" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1753919Ab1DIKHQ (ORCPT ); Sat, 9 Apr 2011 06:07:16 -0400 Received: from localhost (localhost [127.0.0.1]) by filtteri1.pp.htv.fi (Postfix) with ESMTP id EE1B58BBAB; Sat, 9 Apr 2011 13:07:14 +0300 (EEST) X-Virus-Scanned: Debian amavisd-new at pp.htv.fi Received: from smtp5.welho.com ([213.243.153.39]) by localhost (filtteri1.pp.htv.fi [213.243.153.184]) (amavisd-new, port 10024) with ESMTP id yCAQ6n5yHbP4; Sat, 9 Apr 2011 13:07:14 +0300 (EEST) Received: from localhost.localdomain (cs181148025.pp.htv.fi [82.181.148.25]) by smtp5.welho.com (Postfix) with ESMTP id 282CF5BC005; Sat, 9 Apr 2011 13:07:14 +0300 (EEST) From: Pekka Enberg To: kvm@vger.kernel.org Cc: Pekka Enberg , Asias He , Cyrill Gorcunov , Ingo Molnar Subject: [PATCH] kvm tools: Introduce KVM VCPU data structure Date: Sat, 9 Apr 2011 13:07:11 +0300 Message-Id: <1302343631-30060-1-git-send-email-penberg@kernel.org> X-Mailer: git-send-email 1.7.0.4 Sender: kvm-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: kvm@vger.kernel.org X-Greylist: IP, sender and recipient auto-whitelisted, not delayed by milter-greylist-4.2.6 (demeter1.kernel.org [140.211.167.41]); Sat, 09 Apr 2011 10:07:36 +0000 (UTC) In preparation for threaded execution model, this patch introduces a KVM VCPU data structure 'struct kvm_cpu'. Cc: Asias He Cc: Cyrill Gorcunov Cc: Ingo Molnar Signed-off-by: Pekka Enberg --- tools/kvm/Makefile | 1 + tools/kvm/cpuid.c | 6 +- tools/kvm/include/kvm/kvm-cpu.h | 33 ++++ tools/kvm/include/kvm/kvm.h | 16 -- tools/kvm/ioport.c | 2 + tools/kvm/kvm-cpu.c | 370 +++++++++++++++++++++++++++++++++++++++ tools/kvm/kvm-run.c | 62 ++++--- tools/kvm/kvm.c | 338 ----------------------------------- 8 files changed, 445 insertions(+), 383 deletions(-) create mode 100644 tools/kvm/include/kvm/kvm-cpu.h create mode 100644 tools/kvm/kvm-cpu.c diff --git a/tools/kvm/Makefile b/tools/kvm/Makefile index 4bcfd74..141cdec 100644 --- a/tools/kvm/Makefile +++ b/tools/kvm/Makefile @@ -21,6 +21,7 @@ OBJS += disk-image.o OBJS += interrupt.o OBJS += ioport.o OBJS += kvm.o +OBJS += kvm-cpu.o OBJS += main.o OBJS += mmio.o OBJS += pci.o diff --git a/tools/kvm/cpuid.c b/tools/kvm/cpuid.c index f7cc930..0b26eb1 100644 --- a/tools/kvm/cpuid.c +++ b/tools/kvm/cpuid.c @@ -1,3 +1,5 @@ +#include "kvm/kvm-cpu.h" + #include "kvm/kvm.h" #include "kvm/util.h" @@ -30,14 +32,14 @@ static void filter_cpuid(struct kvm_cpuid2 *kvm_cpuid) } } -void kvm__setup_cpuid(struct kvm *self) +void kvm_cpu__setup_cpuid(struct kvm_cpu *self) { struct kvm_cpuid2 *kvm_cpuid; kvm_cpuid = calloc(1, sizeof(*kvm_cpuid) + MAX_KVM_CPUID_ENTRIES * sizeof(*kvm_cpuid->entries)); kvm_cpuid->nent = MAX_KVM_CPUID_ENTRIES; - if (ioctl(self->sys_fd, KVM_GET_SUPPORTED_CPUID, kvm_cpuid) < 0) + if (ioctl(self->kvm->sys_fd, KVM_GET_SUPPORTED_CPUID, kvm_cpuid) < 0) die_perror("KVM_GET_SUPPORTED_CPUID failed"); filter_cpuid(kvm_cpuid); diff --git a/tools/kvm/include/kvm/kvm-cpu.h b/tools/kvm/include/kvm/kvm-cpu.h new file mode 100644 index 0000000..d36dadf --- /dev/null +++ b/tools/kvm/include/kvm/kvm-cpu.h @@ -0,0 +1,33 @@ +#ifndef KVM__KVM_CPU_H +#define KVM__KVM_CPU_H + +#include /* for struct kvm_regs */ + +#include + +struct kvm; + +struct kvm_cpu { + struct kvm *kvm; /* parent KVM */ + int vcpu_fd; /* For VCPU ioctls() */ + struct kvm_run *kvm_run; + + struct kvm_regs regs; + struct kvm_sregs sregs; + struct kvm_fpu fpu; + + struct kvm_msrs *msrs; /* dynamically allocated */ +}; + +struct kvm_cpu *kvm_cpu__init(struct kvm *kvm); +void kvm_cpu__delete(struct kvm_cpu *self); +void kvm_cpu__reset_vcpu(struct kvm_cpu *self); +void kvm_cpu__setup_cpuid(struct kvm_cpu *self); +void kvm_cpu__enable_singlestep(struct kvm_cpu *self); +void kvm_cpu__run(struct kvm_cpu *self); + +void kvm_cpu__show_code(struct kvm_cpu *self); +void kvm_cpu__show_registers(struct kvm_cpu *self); +void kvm_cpu__show_page_tables(struct kvm_cpu *self); + +#endif /* KVM__KVM_CPU_H */ diff --git a/tools/kvm/include/kvm/kvm.h b/tools/kvm/include/kvm/kvm.h index a099307..7af98f9 100644 --- a/tools/kvm/include/kvm/kvm.h +++ b/tools/kvm/include/kvm/kvm.h @@ -3,8 +3,6 @@ #include "kvm/interrupt.h" -#include /* for struct kvm_regs */ - #include #include #include @@ -12,9 +10,7 @@ struct kvm { int sys_fd; /* For system ioctls(), i.e. /dev/kvm */ int vm_fd; /* For VM ioctls() */ - int vcpu_fd; /* For VCPU ioctls() */ timer_t timerid; /* Posix timer for interrupts */ - struct kvm_run *kvm_run; struct disk_image *disk_image; uint64_t ram_size; @@ -26,25 +22,16 @@ struct kvm { uint16_t boot_ip; uint16_t boot_sp; - struct kvm_regs regs; - struct kvm_sregs sregs; - struct kvm_fpu fpu; - struct kvm_msrs *msrs; /* dynamically allocated */ - struct interrupt_table interrupt_table; }; struct kvm *kvm__init(const char *kvm_dev, unsigned long ram_size); void kvm__delete(struct kvm *self); -void kvm__setup_cpuid(struct kvm *self); -void kvm__enable_singlestep(struct kvm *self); bool kvm__load_kernel(struct kvm *kvm, const char *kernel_filename, const char *initrd_filename, const char *kernel_cmdline); -void kvm__reset_vcpu(struct kvm *self); void kvm__setup_bios(struct kvm *self); void kvm__start_timer(struct kvm *self); void kvm__stop_timer(struct kvm *self); -void kvm__run(struct kvm *self); void kvm__irq_line(struct kvm *self, int irq, int level); bool kvm__emulate_io(struct kvm *self, uint16_t port, void *data, int direction, int size, uint32_t count); bool kvm__emulate_mmio(struct kvm *self, uint64_t phys_addr, uint8_t *data, uint32_t len, uint8_t is_write); @@ -52,9 +39,6 @@ bool kvm__emulate_mmio(struct kvm *self, uint64_t phys_addr, uint8_t *data, uint /* * Debugging */ -void kvm__show_code(struct kvm *self); -void kvm__show_registers(struct kvm *self); -void kvm__show_page_tables(struct kvm *self); void kvm__dump_mem(struct kvm *self, unsigned long addr, unsigned long size); extern const char *kvm_exit_reasons[]; diff --git a/tools/kvm/ioport.c b/tools/kvm/ioport.c index 4579e89..6303571 100644 --- a/tools/kvm/ioport.c +++ b/tools/kvm/ioport.c @@ -2,6 +2,8 @@ #include "kvm/kvm.h" +#include /* for KVM_EXIT_* */ + #include #include #include diff --git a/tools/kvm/kvm-cpu.c b/tools/kvm/kvm-cpu.c new file mode 100644 index 0000000..374adb2 --- /dev/null +++ b/tools/kvm/kvm-cpu.c @@ -0,0 +1,370 @@ +#include "kvm/kvm-cpu.h" + +#include "kvm/util.h" +#include "kvm/kvm.h" + +#include +#include +#include +#include +#include + +static inline bool is_in_protected_mode(struct kvm_cpu *self) +{ + return self->sregs.cr0 & 0x01; +} + +static inline uint64_t ip_to_flat(struct kvm_cpu *self, uint64_t ip) +{ + uint64_t cs; + + /* + * NOTE! We should take code segment base address into account here. + * Luckily it's usually zero because Linux uses flat memory model. + */ + if (is_in_protected_mode(self)) + return ip; + + cs = self->sregs.cs.selector; + + return ip + (cs << 4); +} + +static inline uint32_t selector_to_base(uint16_t selector) +{ + /* + * KVM on Intel requires 'base' to be 'selector * 16' in real mode. + */ + return (uint32_t)selector * 16; +} + +static struct kvm_cpu *kvm_cpu__new(struct kvm *kvm) +{ + struct kvm_cpu *self; + + self = calloc(1, sizeof *self); + if (!self) + return NULL; + + self->kvm = kvm; + + return self; +} + +void kvm_cpu__delete(struct kvm_cpu *self) +{ + if (self->msrs) + free(self->msrs); + + free(self); +} + +struct kvm_cpu *kvm_cpu__init(struct kvm *kvm) +{ + struct kvm_cpu *self; + int mmap_size; + + self = kvm_cpu__new(kvm); + if (!self) + return NULL; + + self->vcpu_fd = ioctl(self->kvm->vm_fd, KVM_CREATE_VCPU, 0); + if (self->vcpu_fd < 0) + die_perror("KVM_CREATE_VCPU ioctl"); + + mmap_size = ioctl(self->kvm->sys_fd, KVM_GET_VCPU_MMAP_SIZE, 0); + if (mmap_size < 0) + die_perror("KVM_GET_VCPU_MMAP_SIZE ioctl"); + + self->kvm_run = mmap(NULL, mmap_size, PROT_READ|PROT_WRITE, MAP_SHARED, self->vcpu_fd, 0); + if (self->kvm_run == MAP_FAILED) + die("unable to mmap vcpu fd"); + + return self; +} + +void kvm_cpu__enable_singlestep(struct kvm_cpu *self) +{ + struct kvm_guest_debug debug = { + .control = KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_SINGLESTEP, + }; + + if (ioctl(self->vcpu_fd, KVM_SET_GUEST_DEBUG, &debug) < 0) + warning("KVM_SET_GUEST_DEBUG failed"); +} + +static struct kvm_msrs *kvm_msrs__new(size_t nmsrs) +{ + struct kvm_msrs *self = calloc(1, sizeof(*self) + (sizeof(struct kvm_msr_entry) * nmsrs)); + + if (!self) + die("out of memory"); + + return self; +} + +#define MSR_IA32_TIME_STAMP_COUNTER 0x10 + +#define MSR_IA32_SYSENTER_CS 0x174 +#define MSR_IA32_SYSENTER_ESP 0x175 +#define MSR_IA32_SYSENTER_EIP 0x176 + +#define MSR_IA32_STAR 0xc0000081 +#define MSR_IA32_LSTAR 0xc0000082 +#define MSR_IA32_CSTAR 0xc0000083 +#define MSR_IA32_FMASK 0xc0000084 +#define MSR_IA32_KERNEL_GS_BASE 0xc0000102 + +#define KVM_MSR_ENTRY(_index, _data) \ + (struct kvm_msr_entry) { .index = _index, .data = _data } + +static void kvm_cpu__setup_msrs(struct kvm_cpu *self) +{ + unsigned long ndx = 0; + + self->msrs = kvm_msrs__new(100); + + self->msrs->entries[ndx++] = KVM_MSR_ENTRY(MSR_IA32_SYSENTER_CS, 0x0); + self->msrs->entries[ndx++] = KVM_MSR_ENTRY(MSR_IA32_SYSENTER_ESP, 0x0); + self->msrs->entries[ndx++] = KVM_MSR_ENTRY(MSR_IA32_SYSENTER_EIP, 0x0); +#ifdef CONFIG_X86_64 + self->msrs->entries[ndx++] = KVM_MSR_ENTRY(MSR_IA32_STAR, 0x0); + self->msrs->entries[ndx++] = KVM_MSR_ENTRY(MSR_IA32_CSTAR, 0x0); + self->msrs->entries[ndx++] = KVM_MSR_ENTRY(MSR_IA32_KERNEL_GS_BASE, 0x0); + self->msrs->entries[ndx++] = KVM_MSR_ENTRY(MSR_IA32_FMASK, 0x0); + self->msrs->entries[ndx++] = KVM_MSR_ENTRY(MSR_IA32_LSTAR, 0x0); +#endif + self->msrs->entries[ndx++] = KVM_MSR_ENTRY(MSR_IA32_TIME_STAMP_COUNTER, 0x0); + + self->msrs->nmsrs = ndx; + + if (ioctl(self->vcpu_fd, KVM_SET_MSRS, self->msrs) < 0) + die_perror("KVM_SET_MSRS failed"); +} + +static void kvm_cpu__setup_fpu(struct kvm_cpu *self) +{ + self->fpu = (struct kvm_fpu) { + .fcw = 0x37f, + .mxcsr = 0x1f80, + }; + + if (ioctl(self->vcpu_fd, KVM_SET_FPU, &self->fpu) < 0) + die_perror("KVM_SET_FPU failed"); +} + +static void kvm_cpu__setup_regs(struct kvm_cpu *self) +{ + self->regs = (struct kvm_regs) { + /* We start the guest in 16-bit real mode */ + .rflags = 0x0000000000000002ULL, + + .rip = self->kvm->boot_ip, + .rsp = self->kvm->boot_sp, + .rbp = self->kvm->boot_sp, + }; + + if (self->regs.rip > USHRT_MAX) + die("ip 0x%" PRIx64 " is too high for real mode", (uint64_t) self->regs.rip); + + if (ioctl(self->vcpu_fd, KVM_SET_REGS, &self->regs) < 0) + die_perror("KVM_SET_REGS failed"); +} + +static void kvm_cpu__setup_sregs(struct kvm_cpu *self) +{ + + if (ioctl(self->vcpu_fd, KVM_GET_SREGS, &self->sregs) < 0) + die_perror("KVM_GET_SREGS failed"); + + self->sregs.cs.selector = self->kvm->boot_selector; + self->sregs.cs.base = selector_to_base(self->kvm->boot_selector); + self->sregs.ss.selector = self->kvm->boot_selector; + self->sregs.ss.base = selector_to_base(self->kvm->boot_selector); + self->sregs.ds.selector = self->kvm->boot_selector; + self->sregs.ds.base = selector_to_base(self->kvm->boot_selector); + self->sregs.es.selector = self->kvm->boot_selector; + self->sregs.es.base = selector_to_base(self->kvm->boot_selector); + self->sregs.fs.selector = self->kvm->boot_selector; + self->sregs.fs.base = selector_to_base(self->kvm->boot_selector); + self->sregs.gs.selector = self->kvm->boot_selector; + self->sregs.gs.base = selector_to_base(self->kvm->boot_selector); + + if (ioctl(self->vcpu_fd, KVM_SET_SREGS, &self->sregs) < 0) + die_perror("KVM_SET_SREGS failed"); +} + +/** + * kvm_cpu__reset_vcpu - reset virtual CPU to a known state + */ +void kvm_cpu__reset_vcpu(struct kvm_cpu *self) +{ + kvm_cpu__setup_sregs(self); + kvm_cpu__setup_regs(self); + kvm_cpu__setup_fpu(self); + kvm_cpu__setup_msrs(self); +} + +static void print_dtable(const char *name, struct kvm_dtable *dtable) +{ + printf(" %s %016" PRIx64 " %08" PRIx16 "\n", + name, (uint64_t) dtable->base, (uint16_t) dtable->limit); +} + +static void print_segment(const char *name, struct kvm_segment *seg) +{ + printf(" %s %04" PRIx16 " %016" PRIx64 " %08" PRIx32 " %02" PRIx8 " %x %x %x %x %x %x %x\n", + name, (uint16_t) seg->selector, (uint64_t) seg->base, (uint32_t) seg->limit, + (uint8_t) seg->type, seg->present, seg->dpl, seg->db, seg->s, seg->l, seg->g, seg->avl); +} + +void kvm_cpu__show_registers(struct kvm_cpu *self) +{ + unsigned long cr0, cr2, cr3; + unsigned long cr4, cr8; + unsigned long rax, rbx, rcx; + unsigned long rdx, rsi, rdi; + unsigned long rbp, r8, r9; + unsigned long r10, r11, r12; + unsigned long r13, r14, r15; + unsigned long rip, rsp; + struct kvm_sregs sregs; + unsigned long rflags; + struct kvm_regs regs; + int i; + + if (ioctl(self->vcpu_fd, KVM_GET_REGS, ®s) < 0) + die("KVM_GET_REGS failed"); + + rflags = regs.rflags; + + rip = regs.rip; rsp = regs.rsp; + rax = regs.rax; rbx = regs.rbx; rcx = regs.rcx; + rdx = regs.rdx; rsi = regs.rsi; rdi = regs.rdi; + rbp = regs.rbp; r8 = regs.r8; r9 = regs.r9; + r10 = regs.r10; r11 = regs.r11; r12 = regs.r12; + r13 = regs.r13; r14 = regs.r14; r15 = regs.r15; + + printf("Registers:\n"); + printf(" rip: %016lx rsp: %016lx flags: %016lx\n", rip, rsp, rflags); + printf(" rax: %016lx rbx: %016lx rcx: %016lx\n", rax, rbx, rcx); + printf(" rdx: %016lx rsi: %016lx rdi: %016lx\n", rdx, rsi, rdi); + printf(" rbp: %016lx r8: %016lx r9: %016lx\n", rbp, r8, r9); + printf(" r10: %016lx r11: %016lx r12: %016lx\n", r10, r11, r12); + printf(" r13: %016lx r14: %016lx r15: %016lx\n", r13, r14, r15); + + if (ioctl(self->vcpu_fd, KVM_GET_SREGS, &sregs) < 0) + die("KVM_GET_REGS failed"); + + cr0 = sregs.cr0; cr2 = sregs.cr2; cr3 = sregs.cr3; + cr4 = sregs.cr4; cr8 = sregs.cr8; + + printf(" cr0: %016lx cr2: %016lx cr3: %016lx\n", cr0, cr2, cr3); + printf(" cr4: %016lx cr8: %016lx\n", cr4, cr8); + printf("Segment registers:\n"); + printf(" register selector base limit type p dpl db s l g avl\n"); + print_segment("cs ", &sregs.cs); + print_segment("ss ", &sregs.ss); + print_segment("ds ", &sregs.ds); + print_segment("es ", &sregs.es); + print_segment("fs ", &sregs.fs); + print_segment("gs ", &sregs.gs); + print_segment("tr ", &sregs.tr); + print_segment("ldt", &sregs.ldt); + print_dtable("gdt", &sregs.gdt); + print_dtable("idt", &sregs.idt); + printf(" [ efer: %016" PRIx64 " apic base: %016" PRIx64 " nmi: %s ]\n", + (uint64_t) sregs.efer, (uint64_t) sregs.apic_base, + (self->kvm->nmi_disabled ? "disabled" : "enabled")); + printf("Interrupt bitmap:\n"); + printf(" "); + for (i = 0; i < (KVM_NR_INTERRUPTS + 63) / 64; i++) + printf("%016" PRIx64 " ", (uint64_t) sregs.interrupt_bitmap[i]); + printf("\n"); +} + +void kvm_cpu__show_code(struct kvm_cpu *self) +{ + unsigned int code_bytes = 64; + unsigned int code_prologue = code_bytes * 43 / 64; + unsigned int code_len = code_bytes; + unsigned char c; + unsigned int i; + uint8_t *ip; + + if (ioctl(self->vcpu_fd, KVM_GET_REGS, &self->regs) < 0) + die("KVM_GET_REGS failed"); + + if (ioctl(self->vcpu_fd, KVM_GET_SREGS, &self->sregs) < 0) + die("KVM_GET_SREGS failed"); + + ip = guest_flat_to_host(self->kvm, ip_to_flat(self, self->regs.rip) - code_prologue); + + printf("Code: "); + + for (i = 0; i < code_len; i++, ip++) { + if (!host_ptr_in_ram(self->kvm, ip)) + break; + + c = *ip; + + if (ip == guest_flat_to_host(self->kvm, ip_to_flat(self, self->regs.rip))) + printf("<%02x> ", c); + else + printf("%02x ", c); + } + + printf("\n"); + + printf("Stack:\n"); + kvm__dump_mem(self->kvm, self->regs.rsp, 32); +} + +void kvm_cpu__show_page_tables(struct kvm_cpu *self) +{ + uint64_t *pte1; + uint64_t *pte2; + uint64_t *pte3; + uint64_t *pte4; + + if (!is_in_protected_mode(self)) + return; + + if (ioctl(self->vcpu_fd, KVM_GET_SREGS, &self->sregs) < 0) + die("KVM_GET_SREGS failed"); + + pte4 = guest_flat_to_host(self->kvm, self->sregs.cr3); + if (!host_ptr_in_ram(self->kvm, pte4)) + return; + + pte3 = guest_flat_to_host(self->kvm, (*pte4 & ~0xfff)); + if (!host_ptr_in_ram(self->kvm, pte3)) + return; + + pte2 = guest_flat_to_host(self->kvm, (*pte3 & ~0xfff)); + if (!host_ptr_in_ram(self->kvm, pte2)) + return; + + pte1 = guest_flat_to_host(self->kvm, (*pte2 & ~0xfff)); + if (!host_ptr_in_ram(self->kvm, pte1)) + return; + + printf("Page Tables:\n"); + if (*pte2 & (1 << 7)) + printf(" pte4: %016" PRIx64 " pte3: %016" PRIx64 + " pte2: %016" PRIx64 "\n", + *pte4, *pte3, *pte2); + else + printf(" pte4: %016" PRIx64 " pte3: %016" PRIx64 " pte2: %016" + PRIx64 " pte1: %016" PRIx64 "\n", + *pte4, *pte3, *pte2, *pte1); +} + +void kvm_cpu__run(struct kvm_cpu *self) +{ + int err; + + err = ioctl(self->vcpu_fd, KVM_RUN, 0); + if (err && (errno != EINTR && errno != EAGAIN)) + die_perror("KVM_RUN failed"); +} diff --git a/tools/kvm/kvm-run.c b/tools/kvm/kvm-run.c index 9b0786a..9392818 100644 --- a/tools/kvm/kvm-run.c +++ b/tools/kvm/kvm-run.c @@ -10,6 +10,7 @@ /* user defined header files */ #include #include +#include #include #include #include @@ -29,6 +30,7 @@ #define MIN_RAM_SIZE_BYTE (MIN_RAM_SIZE_MB << MB_SHIFT) static struct kvm *kvm; +static struct kvm_cpu *cpu; static void handle_sigint(int sig) { @@ -37,10 +39,11 @@ static void handle_sigint(int sig) static void handle_sigquit(int sig) { - kvm__show_registers(kvm); - kvm__show_code(kvm); - kvm__show_page_tables(kvm); + kvm_cpu__show_registers(cpu); + kvm_cpu__show_code(cpu); + kvm_cpu__show_page_tables(cpu); + kvm_cpu__delete(cpu); kvm__delete(kvm); exit(1); @@ -130,13 +133,17 @@ int kvm_cmd_run(int argc, const char **argv, const char *prefix) kvm = kvm__init(kvm_dev, ram_size); + cpu = kvm_cpu__init(kvm); + if (!cpu) + die("unable to initialize KVM VCPU"); + if (image_filename) { kvm->disk_image = disk_image__open(image_filename); if (!kvm->disk_image) die("unable to load disk image %s", image_filename); } - kvm__setup_cpuid(kvm); + kvm_cpu__setup_cpuid(cpu); strcpy(real_cmdline, "notsc nolapic noacpi pci=conf1 console=ttyS0 "); if (!kernel_cmdline || !strstr(kernel_cmdline, "root=")) { @@ -153,12 +160,12 @@ int kvm_cmd_run(int argc, const char **argv, const char *prefix) real_cmdline)) die("unable to load kernel %s", kernel_filename); - kvm__reset_vcpu(kvm); + kvm_cpu__reset_vcpu(cpu); kvm__setup_bios(kvm); if (single_step) - kvm__enable_singlestep(kvm); + kvm_cpu__enable_singlestep(cpu); serial8250__init(kvm); @@ -171,23 +178,23 @@ int kvm_cmd_run(int argc, const char **argv, const char *prefix) kvm__start_timer(kvm); for (;;) { - kvm__run(kvm); + kvm_cpu__run(cpu); - switch (kvm->kvm_run->exit_reason) { + switch (cpu->kvm_run->exit_reason) { case KVM_EXIT_DEBUG: - kvm__show_registers(kvm); - kvm__show_code(kvm); + kvm_cpu__show_registers(cpu); + kvm_cpu__show_code(cpu); break; case KVM_EXIT_IO: { bool ret; ret = kvm__emulate_io(kvm, - kvm->kvm_run->io.port, - (uint8_t *)kvm->kvm_run + - kvm->kvm_run->io.data_offset, - kvm->kvm_run->io.direction, - kvm->kvm_run->io.size, - kvm->kvm_run->io.count); + cpu->kvm_run->io.port, + (uint8_t *)cpu->kvm_run + + cpu->kvm_run->io.data_offset, + cpu->kvm_run->io.direction, + cpu->kvm_run->io.size, + cpu->kvm_run->io.count); if (!ret) goto panic_kvm; @@ -197,10 +204,10 @@ int kvm_cmd_run(int argc, const char **argv, const char *prefix) bool ret; ret = kvm__emulate_mmio(kvm, - kvm->kvm_run->mmio.phys_addr, - kvm->kvm_run->mmio.data, - kvm->kvm_run->mmio.len, - kvm->kvm_run->mmio.is_write); + cpu->kvm_run->mmio.phys_addr, + cpu->kvm_run->mmio.data, + cpu->kvm_run->mmio.len, + cpu->kvm_run->mmio.is_write); if (!ret) goto panic_kvm; @@ -227,15 +234,16 @@ exit_kvm: panic_kvm: fprintf(stderr, "KVM exit reason: %" PRIu32 " (\"%s\")\n", - kvm->kvm_run->exit_reason, - kvm_exit_reasons[kvm->kvm_run->exit_reason]); - if (kvm->kvm_run->exit_reason == KVM_EXIT_UNKNOWN) + cpu->kvm_run->exit_reason, + kvm_exit_reasons[cpu->kvm_run->exit_reason]); + if (cpu->kvm_run->exit_reason == KVM_EXIT_UNKNOWN) fprintf(stderr, "KVM exit code: 0x%" PRIu64 "\n", - kvm->kvm_run->hw.hardware_exit_reason); + cpu->kvm_run->hw.hardware_exit_reason); disk_image__close(kvm->disk_image); - kvm__show_registers(kvm); - kvm__show_code(kvm); - kvm__show_page_tables(kvm); + kvm_cpu__show_registers(cpu); + kvm_cpu__show_code(cpu); + kvm_cpu__show_page_tables(cpu); + kvm_cpu__delete(cpu); kvm__delete(kvm); return 1; diff --git a/tools/kvm/kvm.c b/tools/kvm/kvm.c index 2cd206d..af1f65f 100644 --- a/tools/kvm/kvm.c +++ b/tools/kvm/kvm.c @@ -107,9 +107,6 @@ void kvm__delete(struct kvm *self) { kvm__stop_timer(self); - if (self->msrs) - free(self->msrs); - free(self->ram_start); free(self); } @@ -162,7 +159,6 @@ struct kvm *kvm__init(const char *kvm_dev, unsigned long ram_size) struct kvm_pit_config pit_config = { .flags = 0, }; struct kvm *self; long page_size; - int mmap_size; int ret; if (!kvm__cpu_supports_vm()) @@ -222,31 +218,9 @@ struct kvm *kvm__init(const char *kvm_dev, unsigned long ram_size) if (ret < 0) die_perror("KVM_CREATE_IRQCHIP ioctl"); - self->vcpu_fd = ioctl(self->vm_fd, KVM_CREATE_VCPU, 0); - if (self->vcpu_fd < 0) - die_perror("KVM_CREATE_VCPU ioctl"); - - mmap_size = ioctl(self->sys_fd, KVM_GET_VCPU_MMAP_SIZE, 0); - if (mmap_size < 0) - die_perror("KVM_GET_VCPU_MMAP_SIZE ioctl"); - - self->kvm_run = mmap(NULL, mmap_size, PROT_READ|PROT_WRITE, MAP_SHARED, self->vcpu_fd, 0); - if (self->kvm_run == MAP_FAILED) - die("unable to mmap vcpu fd"); - return self; } -void kvm__enable_singlestep(struct kvm *self) -{ - struct kvm_guest_debug debug = { - .control = KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_SINGLESTEP, - }; - - if (ioctl(self->vcpu_fd, KVM_SET_GUEST_DEBUG, &debug) < 0) - warning("KVM_SET_GUEST_DEBUG failed"); -} - #define BOOT_LOADER_SELECTOR 0x1000 #define BOOT_LOADER_IP 0x0000 #define BOOT_LOADER_SP 0x8000 @@ -417,154 +391,6 @@ found_kernel: return ret; } -static inline uint64_t ip_flat_to_real(struct kvm *self, uint64_t ip) -{ - uint64_t cs = self->sregs.cs.selector; - - return ip - (cs << 4); -} - -static inline bool is_in_protected_mode(struct kvm *self) -{ - return self->sregs.cr0 & 0x01; -} - -static inline uint64_t ip_to_flat(struct kvm *self, uint64_t ip) -{ - uint64_t cs; - - /* - * NOTE! We should take code segment base address into account here. - * Luckily it's usually zero because Linux uses flat memory model. - */ - if (is_in_protected_mode(self)) - return ip; - - cs = self->sregs.cs.selector; - - return ip + (cs << 4); -} - -static inline uint32_t selector_to_base(uint16_t selector) -{ - /* - * KVM on Intel requires 'base' to be 'selector * 16' in real mode. - */ - return (uint32_t)selector * 16; -} - -static struct kvm_msrs *kvm_msrs__new(size_t nmsrs) -{ - struct kvm_msrs *self = calloc(1, sizeof(*self) + (sizeof(struct kvm_msr_entry) * nmsrs)); - - if (!self) - die("out of memory"); - - return self; -} - -#define MSR_IA32_TIME_STAMP_COUNTER 0x10 - -#define MSR_IA32_SYSENTER_CS 0x174 -#define MSR_IA32_SYSENTER_ESP 0x175 -#define MSR_IA32_SYSENTER_EIP 0x176 - -#define MSR_IA32_STAR 0xc0000081 -#define MSR_IA32_LSTAR 0xc0000082 -#define MSR_IA32_CSTAR 0xc0000083 -#define MSR_IA32_FMASK 0xc0000084 -#define MSR_IA32_KERNEL_GS_BASE 0xc0000102 - -#define KVM_MSR_ENTRY(_index, _data) \ - (struct kvm_msr_entry) { .index = _index, .data = _data } - -static void kvm__setup_msrs(struct kvm *self) -{ - unsigned long ndx = 0; - - self->msrs = kvm_msrs__new(100); - - self->msrs->entries[ndx++] = KVM_MSR_ENTRY(MSR_IA32_SYSENTER_CS, 0x0); - self->msrs->entries[ndx++] = KVM_MSR_ENTRY(MSR_IA32_SYSENTER_ESP, 0x0); - self->msrs->entries[ndx++] = KVM_MSR_ENTRY(MSR_IA32_SYSENTER_EIP, 0x0); -#ifdef CONFIG_X86_64 - self->msrs->entries[ndx++] = KVM_MSR_ENTRY(MSR_IA32_STAR, 0x0); - self->msrs->entries[ndx++] = KVM_MSR_ENTRY(MSR_IA32_CSTAR, 0x0); - self->msrs->entries[ndx++] = KVM_MSR_ENTRY(MSR_IA32_KERNEL_GS_BASE, 0x0); - self->msrs->entries[ndx++] = KVM_MSR_ENTRY(MSR_IA32_FMASK, 0x0); - self->msrs->entries[ndx++] = KVM_MSR_ENTRY(MSR_IA32_LSTAR, 0x0); -#endif - self->msrs->entries[ndx++] = KVM_MSR_ENTRY(MSR_IA32_TIME_STAMP_COUNTER, 0x0); - - self->msrs->nmsrs = ndx; - - if (ioctl(self->vcpu_fd, KVM_SET_MSRS, self->msrs) < 0) - die_perror("KVM_SET_MSRS failed"); -} - -static void kvm__setup_fpu(struct kvm *self) -{ - self->fpu = (struct kvm_fpu) { - .fcw = 0x37f, - .mxcsr = 0x1f80, - }; - - if (ioctl(self->vcpu_fd, KVM_SET_FPU, &self->fpu) < 0) - die_perror("KVM_SET_FPU failed"); -} - -static void kvm__setup_regs(struct kvm *self) -{ - self->regs = (struct kvm_regs) { - /* We start the guest in 16-bit real mode */ - .rflags = 0x0000000000000002ULL, - - .rip = self->boot_ip, - .rsp = self->boot_sp, - .rbp = self->boot_sp, - }; - - if (self->regs.rip > USHRT_MAX) - die("ip 0x%" PRIx64 " is too high for real mode", (uint64_t) self->regs.rip); - - if (ioctl(self->vcpu_fd, KVM_SET_REGS, &self->regs) < 0) - die_perror("KVM_SET_REGS failed"); -} - -static void kvm__setup_sregs(struct kvm *self) -{ - - if (ioctl(self->vcpu_fd, KVM_GET_SREGS, &self->sregs) < 0) - die_perror("KVM_GET_SREGS failed"); - - self->sregs.cs.selector = self->boot_selector; - self->sregs.cs.base = selector_to_base(self->boot_selector); - self->sregs.ss.selector = self->boot_selector; - self->sregs.ss.base = selector_to_base(self->boot_selector); - self->sregs.ds.selector = self->boot_selector; - self->sregs.ds.base = selector_to_base(self->boot_selector); - self->sregs.es.selector = self->boot_selector; - self->sregs.es.base = selector_to_base(self->boot_selector); - self->sregs.fs.selector = self->boot_selector; - self->sregs.fs.base = selector_to_base(self->boot_selector); - self->sregs.gs.selector = self->boot_selector; - self->sregs.gs.base = selector_to_base(self->boot_selector); - - if (ioctl(self->vcpu_fd, KVM_SET_SREGS, &self->sregs) < 0) - die_perror("KVM_SET_SREGS failed"); -} - -/** - * kvm__reset_vcpu - reset virtual CPU to a known state - */ -void kvm__reset_vcpu(struct kvm *self) -{ - kvm__setup_sregs(self); - kvm__setup_regs(self); - kvm__setup_fpu(self); - kvm__setup_msrs(self); -} - /** * kvm__setup_bios - inject BIOS into guest system memory * @self - guest system descriptor @@ -629,15 +455,6 @@ void kvm__stop_timer(struct kvm *self) self->timerid = 0; } -void kvm__run(struct kvm *self) -{ - int err; - - err = ioctl(self->vcpu_fd, KVM_RUN, 0); - if (err && (errno != EINTR && errno != EAGAIN)) - die_perror("KVM_RUN failed"); -} - void kvm__irq_line(struct kvm *self, int irq, int level) { struct kvm_irq_level irq_level; @@ -653,161 +470,6 @@ void kvm__irq_line(struct kvm *self, int irq, int level) die_perror("KVM_IRQ_LINE failed"); } -static void print_dtable(const char *name, struct kvm_dtable *dtable) -{ - printf(" %s %016" PRIx64 " %08" PRIx16 "\n", - name, (uint64_t) dtable->base, (uint16_t) dtable->limit); -} - -static void print_segment(const char *name, struct kvm_segment *seg) -{ - printf(" %s %04" PRIx16 " %016" PRIx64 " %08" PRIx32 " %02" PRIx8 " %x %x %x %x %x %x %x\n", - name, (uint16_t) seg->selector, (uint64_t) seg->base, (uint32_t) seg->limit, - (uint8_t) seg->type, seg->present, seg->dpl, seg->db, seg->s, seg->l, seg->g, seg->avl); -} - -void kvm__show_registers(struct kvm *self) -{ - unsigned long cr0, cr2, cr3; - unsigned long cr4, cr8; - unsigned long rax, rbx, rcx; - unsigned long rdx, rsi, rdi; - unsigned long rbp, r8, r9; - unsigned long r10, r11, r12; - unsigned long r13, r14, r15; - unsigned long rip, rsp; - struct kvm_sregs sregs; - unsigned long rflags; - struct kvm_regs regs; - int i; - - if (ioctl(self->vcpu_fd, KVM_GET_REGS, ®s) < 0) - die("KVM_GET_REGS failed"); - - rflags = regs.rflags; - - rip = regs.rip; rsp = regs.rsp; - rax = regs.rax; rbx = regs.rbx; rcx = regs.rcx; - rdx = regs.rdx; rsi = regs.rsi; rdi = regs.rdi; - rbp = regs.rbp; r8 = regs.r8; r9 = regs.r9; - r10 = regs.r10; r11 = regs.r11; r12 = regs.r12; - r13 = regs.r13; r14 = regs.r14; r15 = regs.r15; - - printf("Registers:\n"); - printf(" rip: %016lx rsp: %016lx flags: %016lx\n", rip, rsp, rflags); - printf(" rax: %016lx rbx: %016lx rcx: %016lx\n", rax, rbx, rcx); - printf(" rdx: %016lx rsi: %016lx rdi: %016lx\n", rdx, rsi, rdi); - printf(" rbp: %016lx r8: %016lx r9: %016lx\n", rbp, r8, r9); - printf(" r10: %016lx r11: %016lx r12: %016lx\n", r10, r11, r12); - printf(" r13: %016lx r14: %016lx r15: %016lx\n", r13, r14, r15); - - if (ioctl(self->vcpu_fd, KVM_GET_SREGS, &sregs) < 0) - die("KVM_GET_REGS failed"); - - cr0 = sregs.cr0; cr2 = sregs.cr2; cr3 = sregs.cr3; - cr4 = sregs.cr4; cr8 = sregs.cr8; - - printf(" cr0: %016lx cr2: %016lx cr3: %016lx\n", cr0, cr2, cr3); - printf(" cr4: %016lx cr8: %016lx\n", cr4, cr8); - printf("Segment registers:\n"); - printf(" register selector base limit type p dpl db s l g avl\n"); - print_segment("cs ", &sregs.cs); - print_segment("ss ", &sregs.ss); - print_segment("ds ", &sregs.ds); - print_segment("es ", &sregs.es); - print_segment("fs ", &sregs.fs); - print_segment("gs ", &sregs.gs); - print_segment("tr ", &sregs.tr); - print_segment("ldt", &sregs.ldt); - print_dtable("gdt", &sregs.gdt); - print_dtable("idt", &sregs.idt); - printf(" [ efer: %016" PRIx64 " apic base: %016" PRIx64 " nmi: %s ]\n", - (uint64_t) sregs.efer, (uint64_t) sregs.apic_base, - (self->nmi_disabled ? "disabled" : "enabled")); - printf("Interrupt bitmap:\n"); - printf(" "); - for (i = 0; i < (KVM_NR_INTERRUPTS + 63) / 64; i++) - printf("%016" PRIx64 " ", (uint64_t) sregs.interrupt_bitmap[i]); - printf("\n"); -} - -void kvm__show_code(struct kvm *self) -{ - unsigned int code_bytes = 64; - unsigned int code_prologue = code_bytes * 43 / 64; - unsigned int code_len = code_bytes; - unsigned char c; - unsigned int i; - uint8_t *ip; - - if (ioctl(self->vcpu_fd, KVM_GET_REGS, &self->regs) < 0) - die("KVM_GET_REGS failed"); - - if (ioctl(self->vcpu_fd, KVM_GET_SREGS, &self->sregs) < 0) - die("KVM_GET_SREGS failed"); - - ip = guest_flat_to_host(self, ip_to_flat(self, self->regs.rip) - code_prologue); - - printf("Code: "); - - for (i = 0; i < code_len; i++, ip++) { - if (!host_ptr_in_ram(self, ip)) - break; - - c = *ip; - - if (ip == guest_flat_to_host(self, ip_to_flat(self, self->regs.rip))) - printf("<%02x> ", c); - else - printf("%02x ", c); - } - - printf("\n"); - - printf("Stack:\n"); - kvm__dump_mem(self, self->regs.rsp, 32); -} - -void kvm__show_page_tables(struct kvm *self) -{ - uint64_t *pte1; - uint64_t *pte2; - uint64_t *pte3; - uint64_t *pte4; - - if (!is_in_protected_mode(self)) - return; - - if (ioctl(self->vcpu_fd, KVM_GET_SREGS, &self->sregs) < 0) - die("KVM_GET_SREGS failed"); - - pte4 = guest_flat_to_host(self, self->sregs.cr3); - if (!host_ptr_in_ram(self, pte4)) - return; - - pte3 = guest_flat_to_host(self, (*pte4 & ~0xfff)); - if (!host_ptr_in_ram(self, pte3)) - return; - - pte2 = guest_flat_to_host(self, (*pte3 & ~0xfff)); - if (!host_ptr_in_ram(self, pte2)) - return; - - pte1 = guest_flat_to_host(self, (*pte2 & ~0xfff)); - if (!host_ptr_in_ram(self, pte1)) - return; - - printf("Page Tables:\n"); - if (*pte2 & (1 << 7)) - printf(" pte4: %016" PRIx64 " pte3: %016" PRIx64 - " pte2: %016" PRIx64 "\n", - *pte4, *pte3, *pte2); - else - printf(" pte4: %016" PRIx64 " pte3: %016" PRIx64 " pte2: %016" - PRIx64 " pte1: %016" PRIx64 "\n", - *pte4, *pte3, *pte2, *pte1); -} - void kvm__dump_mem(struct kvm *self, unsigned long addr, unsigned long size) { unsigned char *p;