@@ -142,6 +142,7 @@ bool kvm_msi_via_irqfd_allowed;
bool kvm_gsi_routing_allowed;
bool kvm_gsi_direct_mapping;
bool kvm_allowed;
+bool kvm_sregs2;
bool kvm_readonly_mem_allowed;
bool kvm_vm_attributes_allowed;
bool kvm_direct_msi_allowed;
@@ -2186,6 +2187,9 @@ static int kvm_init(MachineState *ms)
kvm_ioeventfd_any_length_allowed =
(kvm_check_extension(s, KVM_CAP_IOEVENTFD_ANY_LENGTH) > 0);
+ kvm_sregs2 =
+ (kvm_check_extension(s, KVM_CAP_SREGS2) > 0);
+
kvm_state = s;
ret = kvm_arch_init(ms, s);
@@ -32,6 +32,7 @@
#ifdef CONFIG_KVM_IS_POSSIBLE
extern bool kvm_allowed;
+extern bool kvm_sregs2;
extern bool kvm_kernel_irqchip;
extern bool kvm_split_irqchip;
extern bool kvm_async_interrupts_allowed;
@@ -139,6 +140,9 @@ extern bool kvm_msi_use_devid;
*/
#define kvm_gsi_direct_mapping() (kvm_gsi_direct_mapping)
+
+#define kvm_supports_sregs2() (kvm_sregs2)
+
/**
* kvm_readonly_mem_enabled:
*
@@ -1422,6 +1422,7 @@ typedef struct CPUX86State {
SegmentCache idt; /* only base and limit are used */
target_ulong cr[5]; /* NOTE: cr1 is unused */
+ uint64_t pdptrs[4];
int32_t a20_mask;
BNDReg bnd_regs[4];
@@ -2514,6 +2514,59 @@ static int kvm_put_sregs(X86CPU *cpu)
return kvm_vcpu_ioctl(CPU(cpu), KVM_SET_SREGS, &sregs);
}
+static int kvm_put_sregs2(X86CPU *cpu)
+{
+ CPUX86State *env = &cpu->env;
+ struct kvm_sregs2 sregs;
+ int i;
+
+ if ((env->eflags & VM_MASK)) {
+ set_v8086_seg(&sregs.cs, &env->segs[R_CS]);
+ set_v8086_seg(&sregs.ds, &env->segs[R_DS]);
+ set_v8086_seg(&sregs.es, &env->segs[R_ES]);
+ set_v8086_seg(&sregs.fs, &env->segs[R_FS]);
+ set_v8086_seg(&sregs.gs, &env->segs[R_GS]);
+ set_v8086_seg(&sregs.ss, &env->segs[R_SS]);
+ } else {
+ set_seg(&sregs.cs, &env->segs[R_CS]);
+ set_seg(&sregs.ds, &env->segs[R_DS]);
+ set_seg(&sregs.es, &env->segs[R_ES]);
+ set_seg(&sregs.fs, &env->segs[R_FS]);
+ set_seg(&sregs.gs, &env->segs[R_GS]);
+ set_seg(&sregs.ss, &env->segs[R_SS]);
+ }
+
+ set_seg(&sregs.tr, &env->tr);
+ set_seg(&sregs.ldt, &env->ldt);
+
+ sregs.idt.limit = env->idt.limit;
+ sregs.idt.base = env->idt.base;
+ memset(sregs.idt.padding, 0, sizeof sregs.idt.padding);
+ sregs.gdt.limit = env->gdt.limit;
+ sregs.gdt.base = env->gdt.base;
+ memset(sregs.gdt.padding, 0, sizeof sregs.gdt.padding);
+
+ sregs.cr0 = env->cr[0];
+ sregs.cr2 = env->cr[2];
+ sregs.cr3 = env->cr[3];
+ sregs.cr4 = env->cr[4];
+
+ sregs.cr8 = cpu_get_apic_tpr(cpu->apic_state);
+ sregs.apic_base = cpu_get_apic_base(cpu->apic_state);
+
+ sregs.efer = env->efer;
+
+ for (i = 0; i < 4; i++) {
+ sregs.pdptrs[i] = env->pdptrs[i];
+ }
+
+ sregs.flags = 0;
+ sregs.padding = 0;
+
+ return kvm_vcpu_ioctl(CPU(cpu), KVM_SET_SREGS2, &sregs);
+}
+
+
static void kvm_msr_buf_reset(X86CPU *cpu)
{
memset(cpu->kvm_msr_buf, 0, MSR_BUF_SIZE);
@@ -3175,6 +3228,49 @@ static int kvm_get_sregs(X86CPU *cpu)
return 0;
}
+static int kvm_get_sregs2(X86CPU *cpu)
+{
+ CPUX86State *env = &cpu->env;
+ struct kvm_sregs2 sregs;
+ int i, ret;
+
+ ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_SREGS2, &sregs);
+ if (ret < 0) {
+ return ret;
+ }
+
+ get_seg(&env->segs[R_CS], &sregs.cs);
+ get_seg(&env->segs[R_DS], &sregs.ds);
+ get_seg(&env->segs[R_ES], &sregs.es);
+ get_seg(&env->segs[R_FS], &sregs.fs);
+ get_seg(&env->segs[R_GS], &sregs.gs);
+ get_seg(&env->segs[R_SS], &sregs.ss);
+
+ get_seg(&env->tr, &sregs.tr);
+ get_seg(&env->ldt, &sregs.ldt);
+
+ env->idt.limit = sregs.idt.limit;
+ env->idt.base = sregs.idt.base;
+ env->gdt.limit = sregs.gdt.limit;
+ env->gdt.base = sregs.gdt.base;
+
+ env->cr[0] = sregs.cr0;
+ env->cr[2] = sregs.cr2;
+ env->cr[3] = sregs.cr3;
+ env->cr[4] = sregs.cr4;
+
+ env->efer = sregs.efer;
+
+ for (i = 0; i < 4; i++) {
+ env->pdptrs[i] = sregs.pdptrs[i];
+ }
+
+ /* changes to apic base and cr8/tpr are read back via kvm_arch_post_run */
+ x86_update_hflags(env);
+
+ return 0;
+}
+
static int kvm_get_msrs(X86CPU *cpu)
{
CPUX86State *env = &cpu->env;
@@ -4000,7 +4096,8 @@ int kvm_arch_put_registers(CPUState *cpu, int level)
assert(cpu_is_stopped(cpu) || qemu_cpu_is_self(cpu));
/* must be before kvm_put_nested_state so that EFER.SVME is set */
- ret = kvm_put_sregs(x86_cpu);
+ ret = kvm_supports_sregs2() ? kvm_put_sregs2(x86_cpu) :
+ kvm_put_sregs(x86_cpu);
if (ret < 0) {
return ret;
}
@@ -4105,7 +4202,7 @@ int kvm_arch_get_registers(CPUState *cs)
if (ret < 0) {
goto out;
}
- ret = kvm_get_sregs(cpu);
+ ret = kvm_supports_sregs2() ? kvm_get_sregs2(cpu) : kvm_get_sregs(cpu);
if (ret < 0) {
goto out;
}
@@ -1396,6 +1396,38 @@ static const VMStateDescription vmstate_msr_tsx_ctrl = {
}
};
+static bool pdptrs_needed(void *opaque)
+{
+ X86CPU *cpu = opaque;
+ CPUX86State *env = &cpu->env;
+
+ if (!kvm_supports_sregs2()) {
+ return false;
+ }
+
+ if (!(env->cr[0] & CR0_PG_MASK)) {
+ return false;
+ }
+
+ if (env->efer & MSR_EFER_LMA) {
+ return false;
+ }
+
+ return env->cr[4] & CR4_PAE_MASK;
+}
+
+static const VMStateDescription vmstate_pdptrs = {
+ .name = "cpu/pdptrs",
+ .version_id = 1,
+ .minimum_version_id = 1,
+ .needed = pdptrs_needed,
+ .fields = (VMStateField[]) {
+ VMSTATE_UINT64_ARRAY(env.pdptrs, X86CPU, 4),
+ VMSTATE_END_OF_LIST()
+ }
+};
+
+
VMStateDescription vmstate_x86_cpu = {
.name = "cpu",
.version_id = 12,
@@ -1531,6 +1563,7 @@ VMStateDescription vmstate_x86_cpu = {
&vmstate_nested_state,
#endif
&vmstate_msr_tsx_ctrl,
+ &vmstate_pdptrs,
NULL
}
};
This allows qemu to make PDPTRs be part of the migration stream and thus not reload them after a migration which is against X86 spec. Signed-off-by: Maxim Levitsky <mlevitsk@redhat.com> --- accel/kvm/kvm-all.c | 4 ++ include/sysemu/kvm.h | 4 ++ target/i386/cpu.h | 1 + target/i386/kvm/kvm.c | 101 +++++++++++++++++++++++++++++++++++++++++- target/i386/machine.c | 33 ++++++++++++++ 5 files changed, 141 insertions(+), 2 deletions(-)