@@ -331,14 +331,14 @@ static const char *cpuid_6_feature_name[] = {
#define TCG_EXT_FEATURES (CPUID_EXT_SSE3 | CPUID_EXT_PCLMULQDQ | \
CPUID_EXT_MONITOR | CPUID_EXT_SSSE3 | CPUID_EXT_CX16 | \
CPUID_EXT_SSE41 | CPUID_EXT_SSE42 | CPUID_EXT_POPCNT | \
+ CPUID_EXT_XSAVE | /* CPUID_EXT_OSXSAVE is dynamic */ \
CPUID_EXT_MOVBE | CPUID_EXT_AES | CPUID_EXT_HYPERVISOR)
/* missing:
CPUID_EXT_DTES64, CPUID_EXT_DSCPL, CPUID_EXT_VMX, CPUID_EXT_SMX,
CPUID_EXT_EST, CPUID_EXT_TM2, CPUID_EXT_CID, CPUID_EXT_FMA,
CPUID_EXT_XTPR, CPUID_EXT_PDCM, CPUID_EXT_PCID, CPUID_EXT_DCA,
- CPUID_EXT_X2APIC, CPUID_EXT_TSC_DEADLINE_TIMER, CPUID_EXT_XSAVE,
- CPUID_EXT_OSXSAVE, CPUID_EXT_AVX, CPUID_EXT_F16C,
- CPUID_EXT_RDRAND */
+ CPUID_EXT_X2APIC, CPUID_EXT_TSC_DEADLINE_TIMER, CPUID_EXT_AVX,
+ CPUID_EXT_F16C, CPUID_EXT_RDRAND */
#ifdef TARGET_X86_64
#define TCG_EXT2_X86_64_FEATURES (CPUID_EXT2_SYSCALL | CPUID_EXT2_LM)
@@ -440,7 +440,7 @@ static FeatureWordInfo feature_word_info[FEATURE_WORDS] = {
.cpuid_eax = 0xd,
.cpuid_needs_ecx = true, .cpuid_ecx = 1,
.cpuid_reg = R_EAX,
- .tcg_features = 0,
+ .tcg_features = CPUID_XSAVE_XGETBV1,
},
[FEAT_6_EAX] = {
.feat_names = cpuid_6_feature_name,
@@ -2323,10 +2323,13 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count,
*ebx = (cpu->apic_id << 24) |
8 << 8; /* CLFLUSH size in quad words, Linux wants it. */
*ecx = env->features[FEAT_1_ECX];
+ if ((*ecx & CPUID_EXT_XSAVE) && (env->hflags & HF_OSXSAVE_MASK)) {
+ *ecx |= CPUID_EXT_OSXSAVE;
+ }
*edx = env->features[FEAT_1_EDX];
if (cs->nr_cores * cs->nr_threads > 1) {
*ebx |= (cs->nr_cores * cs->nr_threads) << 16;
- *edx |= 1 << 28; /* HTT bit */
+ *edx |= CPUID_HT;
}
break;
case 2:
@@ -2450,7 +2453,7 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count,
break;
case 0xD: {
KVMState *s = cs->kvm_state;
- uint64_t kvm_mask;
+ uint64_t ena_mask;
int i;
/* Processor Extended State */
@@ -2458,35 +2461,40 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count,
*ebx = 0;
*ecx = 0;
*edx = 0;
- if (!(env->features[FEAT_1_ECX] & CPUID_EXT_XSAVE) || !kvm_enabled()) {
+ if (!(env->features[FEAT_1_ECX] & CPUID_EXT_XSAVE)) {
break;
}
- kvm_mask =
- kvm_arch_get_supported_cpuid(s, 0xd, 0, R_EAX) |
- ((uint64_t)kvm_arch_get_supported_cpuid(s, 0xd, 0, R_EDX) << 32);
+ if (kvm_enabled()) {
+ ena_mask = kvm_arch_get_supported_cpuid(s, 0xd, 0, R_EDX);
+ ena_mask <<= 32;
+ ena_mask |= kvm_arch_get_supported_cpuid(s, 0xd, 0, R_EAX);
+ } else {
+ ena_mask = -1;
+ }
if (count == 0) {
*ecx = 0x240;
for (i = 2; i < ARRAY_SIZE(ext_save_areas); i++) {
const ExtSaveArea *esa = &ext_save_areas[i];
- if ((env->features[esa->feature] & esa->bits) == esa->bits &&
- (kvm_mask & (1 << i)) != 0) {
+ if (esa->bits
+ && (env->features[esa->feature] & esa->bits) == esa->bits
+ && ((ena_mask >> i) & 1) != 0) {
if (i < 32) {
- *eax |= 1 << i;
+ *eax |= 1u << i;
} else {
- *edx |= 1 << (i - 32);
+ *edx |= 1u << (i - 32);
}
*ecx = MAX(*ecx, esa->offset + esa->size);
}
}
- *eax |= kvm_mask & (XSTATE_FP | XSTATE_SSE);
+ *eax |= ena_mask & (XSTATE_FP | XSTATE_SSE);
*ebx = *ecx;
} else if (count == 1) {
*eax = env->features[FEAT_XSAVE];
} else if (count < ARRAY_SIZE(ext_save_areas)) {
const ExtSaveArea *esa = &ext_save_areas[count];
- if ((env->features[esa->feature] & esa->bits) == esa->bits &&
- (kvm_mask & (1 << count)) != 0) {
+ if ((env->features[esa->feature] & esa->bits) == esa->bits
+ && ((ena_mask >> count) & 1) != 0) {
*eax = esa->size;
*ebx = esa->offset;
}
@@ -156,6 +156,7 @@
#define HF_OSFXSR_SHIFT 22 /* CR4.OSFXSR */
#define HF_SMAP_SHIFT 23 /* CR4.SMAP */
#define HF_IOBPT_SHIFT 24 /* an io breakpoint enabled */
+#define HF_OSXSAVE_SHIFT 25 /* CR4.OSXSAVE */
#define HF_CPL_MASK (3 << HF_CPL_SHIFT)
#define HF_SOFTMMU_MASK (1 << HF_SOFTMMU_SHIFT)
@@ -180,6 +181,7 @@
#define HF_OSFXSR_MASK (1 << HF_OSFXSR_SHIFT)
#define HF_SMAP_MASK (1 << HF_SMAP_SHIFT)
#define HF_IOBPT_MASK (1 << HF_IOBPT_SHIFT)
+#define HF_OSXSAVE_MASK (1 << HF_OSXSAVE_SHIFT)
/* hflags2 */
@@ -1189,6 +1189,39 @@ void helper_fxsave(CPUX86State *env, target_ulong ptr)
}
}
+static uint64_t get_xinuse(CPUX86State *env)
+{
+ /* We don't track XINUSE. We could calculate it here, but it's
+ probably less work to simply indicate all components in use. */
+ return -1;
+}
+
+void helper_xsave(CPUX86State *env, target_ulong ptr, uint64_t rfbm)
+{
+ uint64_t old_bv, new_bv;
+
+ /* The operand must be 64 byte aligned. */
+ if (ptr & 63) {
+ raise_exception_err_ra(env, EXCP0D_GPF, 0, GETPC());
+ }
+
+ /* Never save anything not enabled by XCR0. */
+ rfbm &= env->xcr0;
+
+ if (rfbm & XSTATE_FP) {
+ do_xsave_fpu(env, ptr, GETPC());
+ }
+ if (rfbm & XSTATE_SSE) {
+ do_xsave_mxcsr(env, ptr, GETPC());
+ do_xsave_sse(env, ptr, GETPC());
+ }
+
+ /* Update the XSTATE_BV field. */
+ old_bv = cpu_ldq_data_ra(env, ptr + 512, GETPC());
+ new_bv = (old_bv & ~rfbm) | (get_xinuse(env) & rfbm);
+ cpu_stq_data_ra(env, ptr + 512, new_bv, GETPC());
+}
+
static void do_xrstor_fpu(CPUX86State *env, target_ulong ptr, uintptr_t retaddr)
{
int i, fpus, fptag;
@@ -1256,6 +1289,96 @@ void helper_fxrstor(CPUX86State *env, target_ulong ptr)
}
}
+void helper_xrstor(CPUX86State *env, target_ulong ptr, uint64_t rfbm)
+{
+ uint64_t xstate_bv, xcomp_bv0, xcomp_bv1;
+
+ rfbm &= env->xcr0;
+
+ /* The operand must be 64 byte aligned. */
+ if (ptr & 63) {
+ raise_exception_err_ra(env, EXCP0D_GPF, 0, GETPC());
+ }
+
+ xstate_bv = cpu_ldq_data_ra(env, ptr + 512, GETPC());
+
+ if ((int64_t)xstate_bv < 0) {
+ /* FIXME: Compact form. */
+ raise_exception_err_ra(env, EXCP0D_GPF, 0, GETPC());
+ }
+
+ /* Standard form. */
+
+ /* The XSTATE field must not set bits not present in XCR0. */
+ if (xstate_bv & ~env->xcr0) {
+ raise_exception_err_ra(env, EXCP0D_GPF, 0, GETPC());
+ }
+
+ /* The XCOMP field must be zero. */
+ xcomp_bv0 = cpu_ldq_data_ra(env, ptr + 520, GETPC());
+ xcomp_bv1 = cpu_ldq_data_ra(env, ptr + 528, GETPC());
+ if (xcomp_bv0 || xcomp_bv1) {
+ raise_exception_err_ra(env, EXCP0D_GPF, 0, GETPC());
+ }
+
+ if (rfbm & XSTATE_FP) {
+ if (xstate_bv & XSTATE_FP) {
+ do_xrstor_fpu(env, ptr, GETPC());
+ } else {
+ helper_fninit(env);
+ memset(env->fpregs, 0, sizeof(env->fpregs));
+ }
+ }
+ if (rfbm & XSTATE_SSE) {
+ /* Note that the standard form of XRSTOR loads MXCSR from memory
+ whether or not the XSTATE_BV bit is set. */
+ do_xrstor_mxcsr(env, ptr, GETPC());
+ if (xstate_bv & XSTATE_SSE) {
+ do_xrstor_sse(env, ptr, GETPC());
+ } else {
+ /* ??? When AVX is implemented, we may have to be more
+ selective in the clearing. */
+ memset(env->xmm_regs, 0, sizeof(env->xmm_regs));
+ }
+ }
+}
+
+uint64_t helper_xgetbv(CPUX86State *env, uint32_t ecx)
+{
+ switch (ecx) {
+ case 0:
+ return env->xcr0;
+ case 1:
+ /* FIXME: #GP if !CPUID.(EAX=0DH,ECX=1):EAX.XG1[bit 2]. */
+ return env->xcr0 & get_xinuse(env);
+ }
+ raise_exception_err_ra(env, EXCP0D_GPF, 0, GETPC());
+}
+
+void helper_xsetbv(CPUX86State *env, uint32_t ecx, uint64_t mask)
+{
+ uint32_t dummy, ena_lo, ena_hi;
+ uint64_t ena;
+
+ /* Only XCR0 is defined at present; the FPU may not be disabled. */
+ if (ecx != 0 || (mask & XSTATE_FP) == 0) {
+ goto do_gpf;
+ }
+
+ /* Disallow enabling unimplemented features. */
+ cpu_x86_cpuid(env, 0x0d, 0, &ena_lo, &dummy, &dummy, &ena_hi);
+ ena = ((uint64_t)ena_hi << 32) | ena_lo;
+ if (mask & ~ena) {
+ goto do_gpf;
+ }
+
+ env->xcr0 = mask;
+ return;
+
+ do_gpf:
+ raise_exception_err_ra(env, EXCP0D_GPF, 0, GETPC());
+}
+
void cpu_get_fp80(uint64_t *pmant, uint16_t *pexp, floatx80 f)
{
CPU_LDoubleU temp;
@@ -647,6 +647,7 @@ void cpu_x86_update_cr3(CPUX86State *env, target_ulong new_cr3)
void cpu_x86_update_cr4(CPUX86State *env, uint32_t new_cr4)
{
X86CPU *cpu = x86_env_get_cpu(env);
+ uint32_t hflags;
#if defined(DEBUG_MMU)
printf("CR4 update: CR4=%08x\n", (uint32_t)env->cr[4]);
@@ -656,24 +657,44 @@ void cpu_x86_update_cr4(CPUX86State *env, uint32_t new_cr4)
CR4_SMEP_MASK | CR4_SMAP_MASK)) {
tlb_flush(CPU(cpu), 1);
}
+
+ /* Clear bits we're going to recompute. */
+ hflags = env->hflags & ~(HF_OSFXSR_MASK | HF_OSXSAVE_MASK | HF_SMAP_MASK);
+
/* SSE handling */
if (!(env->features[FEAT_1_EDX] & CPUID_SSE)) {
- new_cr4 &= ~CR4_OSFXSR_MASK;
+ if (new_cr4 & CR4_OSFXSR_MASK) {
+ goto do_gpf;
+ }
}
- env->hflags &= ~HF_OSFXSR_MASK;
if (new_cr4 & CR4_OSFXSR_MASK) {
- env->hflags |= HF_OSFXSR_MASK;
+ hflags |= HF_OSFXSR_MASK;
+ }
+
+ if (!(env->features[FEAT_1_ECX] & CPUID_EXT_XSAVE)) {
+ if (new_cr4 & CR4_OSXSAVE_MASK) {
+ goto do_gpf;
+ }
+ }
+ if (new_cr4 & CR4_OSXSAVE_MASK) {
+ hflags |= HF_OSXSAVE_MASK;
}
if (!(env->features[FEAT_7_0_EBX] & CPUID_7_0_EBX_SMAP)) {
- new_cr4 &= ~CR4_SMAP_MASK;
+ if (new_cr4 & CR4_SMAP_MASK) {
+ goto do_gpf;
+ }
}
- env->hflags &= ~HF_SMAP_MASK;
if (new_cr4 & CR4_SMAP_MASK) {
- env->hflags |= HF_SMAP_MASK;
+ hflags |= HF_SMAP_MASK;
}
env->cr[4] = new_cr4;
+ env->hflags = hflags;
+ return;
+
+do_gpf:
+ raise_exception_err_ra(env, EXCP0D_GPF, 0, GETPC());
}
#if defined(CONFIG_USER_ONLY)
@@ -187,6 +187,10 @@ DEF_HELPER_3(fsave, void, env, tl, int)
DEF_HELPER_3(frstor, void, env, tl, int)
DEF_HELPER_FLAGS_2(fxsave, TCG_CALL_NO_WG, void, env, tl)
DEF_HELPER_FLAGS_2(fxrstor, TCG_CALL_NO_WG, void, env, tl)
+DEF_HELPER_FLAGS_3(xsave, TCG_CALL_NO_WG, void, env, tl, i64)
+DEF_HELPER_FLAGS_3(xrstor, TCG_CALL_NO_WG, void, env, tl, i64)
+DEF_HELPER_FLAGS_2(xgetbv, TCG_CALL_NO_WG, i64, env, i32)
+DEF_HELPER_FLAGS_3(xsetbv, TCG_CALL_NO_WG, void, env, i32, i64)
DEF_HELPER_FLAGS_1(clz, TCG_CALL_NO_RWG_SE, tl, tl)
DEF_HELPER_FLAGS_1(ctz, TCG_CALL_NO_RWG_SE, tl, tl)
@@ -1852,16 +1852,22 @@ static int kvm_get_sregs(X86CPU *cpu)
#define HFLAG_COPY_MASK \
~( HF_CPL_MASK | HF_PE_MASK | HF_MP_MASK | HF_EM_MASK | \
HF_TS_MASK | HF_TF_MASK | HF_VM_MASK | HF_IOPL_MASK | \
- HF_OSFXSR_MASK | HF_LMA_MASK | HF_CS32_MASK | \
+ HF_OSFXSR_MASK | HF_OSXSAVE_MASK | HF_LMA_MASK | HF_CS32_MASK | \
HF_SS32_MASK | HF_CS64_MASK | HF_ADDSEG_MASK)
- hflags = (env->segs[R_SS].flags >> DESC_DPL_SHIFT) & HF_CPL_MASK;
+ hflags = env->hflags & HFLAG_COPY_MASK;
+ hflags |= (env->segs[R_SS].flags >> DESC_DPL_SHIFT) & HF_CPL_MASK;
hflags |= (env->cr[0] & CR0_PE_MASK) << (HF_PE_SHIFT - CR0_PE_SHIFT);
hflags |= (env->cr[0] << (HF_MP_SHIFT - CR0_MP_SHIFT)) &
(HF_MP_MASK | HF_EM_MASK | HF_TS_MASK);
hflags |= (env->eflags & (HF_TF_MASK | HF_VM_MASK | HF_IOPL_MASK));
- hflags |= (env->cr[4] & CR4_OSFXSR_MASK) <<
- (HF_OSFXSR_SHIFT - CR4_OSFXSR_SHIFT);
+
+ if (env->cr[4] & CR4_OSFXSR_MASK) {
+ hflags |= HF_OSFXSR_MASK;
+ }
+ if (env->cr[4] & CR4_OSXSAVE_MASK) {
+ hflags |= HF_OSXSAVE_MASK;
+ }
if (env->efer & MSR_EFER_LMA) {
hflags |= HF_LMA_MASK;
@@ -1882,7 +1888,7 @@ static int kvm_get_sregs(X86CPU *cpu)
env->segs[R_SS].base) != 0) << HF_ADDSEG_SHIFT;
}
}
- env->hflags = (env->hflags & HFLAG_COPY_MASK) | hflags;
+ env->hflags = hflags;
return 0;
}
@@ -7079,6 +7079,42 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
gen_op_st_v(s, CODE64(s) + MO_32, cpu_T0, cpu_A0);
break;
+ case 0xd0: /* xgetbv */
+ if ((s->cpuid_ext_features & CPUID_EXT_XSAVE) == 0
+ || (s->flags & HF_OSXSAVE_MASK) == 0
+ || (s->prefix & (PREFIX_LOCK | PREFIX_DATA
+ | PREFIX_REPZ | PREFIX_REPNZ))) {
+ goto illegal_op;
+ }
+ gen_update_cc_op(s);
+ gen_jmp_im(pc_start - s->cs_base);
+ tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_regs[R_ECX]);
+ gen_helper_xgetbv(cpu_tmp1_i64, cpu_env, cpu_tmp2_i32);
+ tcg_gen_extr_i64_tl(cpu_regs[R_EAX], cpu_regs[R_EDX], cpu_tmp1_i64);
+ break;
+
+ case 0xd1: /* xsetbv */
+ if ((s->cpuid_ext_features & CPUID_EXT_XSAVE) == 0
+ || (s->flags & HF_OSXSAVE_MASK) == 0
+ || (s->prefix & (PREFIX_LOCK | PREFIX_DATA
+ | PREFIX_REPZ | PREFIX_REPNZ))) {
+ goto illegal_op;
+ }
+ if (s->cpl != 0) {
+ gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
+ break;
+ }
+ gen_update_cc_op(s);
+ gen_jmp_im(pc_start - s->cs_base);
+ tcg_gen_concat_tl_i64(cpu_tmp1_i64, cpu_regs[R_EAX],
+ cpu_regs[R_EDX]);
+ tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_regs[R_ECX]);
+ gen_helper_xsetbv(cpu_env, cpu_tmp2_i32, cpu_tmp1_i64);
+ /* End TB because translation flags may change. */
+ gen_jmp_im(s->pc - s->cs_base);
+ gen_eob(s);
+ break;
+
case 0xd8: /* VMRUN */
if (!(s->flags & HF_SVME_MASK) || !s->pe) {
goto illegal_op;
@@ -7567,10 +7603,44 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
gen_op_st_v(s, MO_32, cpu_T0, cpu_A0);
}
break;
- case 5: /* lfence */
+ case 4: /* xsave */
+ if (mod == 3
+ || (s->cpuid_ext_features & CPUID_EXT_XSAVE) == 0
+ || (s->flags & HF_OSXSAVE_MASK) == 0
+ || (s->prefix & (PREFIX_LOCK | PREFIX_DATA
+ | PREFIX_REPZ | PREFIX_REPNZ))) {
+ goto illegal_op;
+ }
+ gen_lea_modrm(env, s, modrm);
+ tcg_gen_concat_tl_i64(cpu_tmp1_i64, cpu_regs[R_EAX],
+ cpu_regs[R_EDX]);
+ gen_helper_xsave(cpu_env, cpu_A0, cpu_tmp1_i64);
+ break;
if ((modrm & 0xc7) != 0xc0 || !(s->cpuid_features & CPUID_SSE2))
goto illegal_op;
break;
+ case 5:
+ if (mod == 3) {
+ /* lfence */
+ if (!(s->cpuid_features & CPUID_SSE2)
+ || (s->prefix & PREFIX_LOCK)) {
+ goto illegal_op;
+ }
+ /* no-op */
+ } else {
+ /* xrstor */
+ if ((s->cpuid_ext_features & CPUID_EXT_XSAVE) == 0
+ || (s->flags & HF_OSXSAVE_MASK) == 0
+ || (s->prefix & (PREFIX_LOCK | PREFIX_DATA
+ | PREFIX_REPZ | PREFIX_REPNZ))) {
+ goto illegal_op;
+ }
+ gen_lea_modrm(env, s, modrm);
+ tcg_gen_concat_tl_i64(cpu_tmp1_i64, cpu_regs[R_EAX],
+ cpu_regs[R_EDX]);
+ gen_helper_xrstor(cpu_env, cpu_A0, cpu_tmp1_i64);
+ }
+ break;
case 6: /* mfence/clwb */
if (s->prefix & PREFIX_DATA) {
/* clwb */