Message ID | 09fe2c18-0037-af71-93be-87261051e2a2@suse.com (mailing list archive) |
---|---|
State | Superseded |
Headers | show |
Series | x86emul: further work | expand |
On Tue, May 05, 2020 at 10:16:20AM +0200, Jan Beulich wrote: > While the Intel SDM claims that FRSTOR itself may raise #MF upon > completion, this was confirmed by Intel to be a doc error which will be > corrected in due course; behavior is like FLDENV, and like old hard copy > manuals describe it. Otherwise we'd have to emulate the insn by filling > st(N) in suitable order, followed by FLDENV. > > Signed-off-by: Jan Beulich <jbeulich@suse.com> > --- > v7: New. > > --- a/tools/tests/x86_emulator/test_x86_emulator.c > +++ b/tools/tests/x86_emulator/test_x86_emulator.c > @@ -2442,6 +2442,27 @@ int main(int argc, char **argv) > else > printf("skipped\n"); > > + printf("%-40s", "Testing fldenv 8(%edx)..."); Likely a stupid question, but why the added 8? edx will contain the memory address used to save the sate by fnstenv, so I would expect fldenv to just load from there? > + if ( stack_exec && cpu_has_fpu ) > + { > + asm volatile ( "fnstenv %0\n\t" > + "fninit" > + : "=m" (res[2]) :: "memory" ); Why do you need the memory clobber here? I assume it's because res is of type unsigned int and hence doesn't have the right size that fnstenv will actually write to? > + zap_fpsel(&res[2], true); > + instr[0] = 0xd9; instr[1] = 0x62; instr[2] = 0x08; > + regs.eip = (unsigned long)&instr[0]; > + regs.edx = (unsigned long)res; > + rc = x86_emulate(&ctxt, &emulops); > + asm volatile ( "fnstenv %0" : "=m" (res[9]) :: "memory" ); > + if ( (rc != X86EMUL_OKAY) || > + memcmp(res + 2, res + 9, 28) || > + (regs.eip != (unsigned long)&instr[3]) ) > + goto fail; > + printf("okay\n"); > + } > + else > + printf("skipped\n"); > + > printf("%-40s", "Testing 16-bit fnsave (%ecx)..."); > if ( stack_exec && cpu_has_fpu ) > { > @@ -2468,6 +2489,31 @@ int main(int argc, char **argv) > goto fail; > printf("okay\n"); > } > + else > + printf("skipped\n"); > + > + printf("%-40s", "Testing frstor (%edx)..."); > + if ( stack_exec && cpu_has_fpu ) > + { > + const uint16_t seven = 7; > + > + asm volatile ( "fninit\n\t" > + "fld1\n\t" > + "fidivs %1\n\t" > + "fnsave %0\n\t" > + : "=&m" (res[0]) : "m" (seven) : "memory" ); > + zap_fpsel(&res[0], true); > + instr[0] = 0xdd; instr[1] = 0x22; > + regs.eip = (unsigned long)&instr[0]; > + regs.edx = (unsigned long)res; > + rc = x86_emulate(&ctxt, &emulops); > + asm volatile ( "fnsave %0" : "=m" (res[27]) :: "memory" ); > + if ( (rc != X86EMUL_OKAY) || > + memcmp(res, res + 27, 108) || > + (regs.eip != (unsigned long)&instr[2]) ) > + goto fail; > + printf("okay\n"); > + } > else > printf("skipped\n"); > > --- a/xen/arch/x86/x86_emulate/x86_emulate.c > +++ b/xen/arch/x86/x86_emulate/x86_emulate.c > @@ -857,6 +857,7 @@ struct x86_emulate_state { > blk_NONE, > blk_enqcmd, > #ifndef X86EMUL_NO_FPU > + blk_fld, /* FLDENV, FRSTOR */ > blk_fst, /* FNSTENV, FNSAVE */ > #endif > blk_movdir, > @@ -4948,21 +4949,14 @@ x86_emulate( > dst.bytes = 4; > emulate_fpu_insn_memdst(b, modrm_reg & 7, dst.val); > break; > - case 4: /* fldenv - TODO */ > - state->fpu_ctrl = true; > - goto unimplemented_insn; > - case 5: /* fldcw m2byte */ > - state->fpu_ctrl = true; > - fpu_memsrc16: > - if ( (rc = ops->read(ea.mem.seg, ea.mem.off, &src.val, > - 2, ctxt)) != X86EMUL_OKAY ) > - goto done; > - emulate_fpu_insn_memsrc(b, modrm_reg & 7, src.val); > - break; > + case 4: /* fldenv */ > + /* Raise #MF now if there are pending unmasked exceptions. */ > + emulate_fpu_insn_stub(0xd9, 0xd0 /* fnop */); Maybe it would make sense to have a wrapper for fnop? > + /* fall through */ > case 6: /* fnstenv */ > fail_if(!ops->blk); > - state->blk = blk_fst; > - /* REX is meaningless for this insn by this point. */ > + state->blk = modrm_reg & 2 ? blk_fst : blk_fld; > + /* REX is meaningless for these insns by this point. */ > rex_prefix = in_protmode(ctxt, ops); > if ( (rc = ops->blk(ea.mem.seg, ea.mem.off, NULL, > op_bytes > 2 ? sizeof(struct x87_env32) > @@ -4972,6 +4966,14 @@ x86_emulate( > goto done; > state->fpu_ctrl = true; > break; > + case 5: /* fldcw m2byte */ > + state->fpu_ctrl = true; > + fpu_memsrc16: > + if ( (rc = ops->read(ea.mem.seg, ea.mem.off, &src.val, > + 2, ctxt)) != X86EMUL_OKAY ) > + goto done; > + emulate_fpu_insn_memsrc(b, modrm_reg & 7, src.val); > + break; > case 7: /* fnstcw m2byte */ > state->fpu_ctrl = true; > fpu_memdst16: > @@ -5124,13 +5126,14 @@ x86_emulate( > dst.bytes = 8; > emulate_fpu_insn_memdst(b, modrm_reg & 7, dst.val); > break; > - case 4: /* frstor - TODO */ > - state->fpu_ctrl = true; > - goto unimplemented_insn; > + case 4: /* frstor */ > + /* Raise #MF now if there are pending unmasked exceptions. */ > + emulate_fpu_insn_stub(0xd9, 0xd0 /* fnop */); > + /* fall through */ > case 6: /* fnsave */ > fail_if(!ops->blk); > - state->blk = blk_fst; > - /* REX is meaningless for this insn by this point. */ > + state->blk = modrm_reg & 2 ? blk_fst : blk_fld; > + /* REX is meaningless for these insns by this point. */ > rex_prefix = in_protmode(ctxt, ops); > if ( (rc = ops->blk(ea.mem.seg, ea.mem.off, NULL, > op_bytes > 2 ? sizeof(struct x87_env32) + 80 > @@ -11648,6 +11651,89 @@ int x86_emul_blk( > > #ifndef X86EMUL_NO_FPU > > + case blk_fld: > + ASSERT(!data); > + > + /* state->rex_prefix carries CR0.PE && !EFLAGS.VM setting */ > + switch ( bytes ) > + { > + case sizeof(fpstate.env): > + case sizeof(fpstate): > + memcpy(&fpstate.env, ptr, sizeof(fpstate.env)); > + if ( !state->rex_prefix ) > + { > + unsigned int fip = fpstate.env.mode.real.fip_lo + > + (fpstate.env.mode.real.fip_hi << 16); > + unsigned int fdp = fpstate.env.mode.real.fdp_lo + > + (fpstate.env.mode.real.fdp_hi << 16); > + unsigned int fop = fpstate.env.mode.real.fop; > + > + fpstate.env.mode.prot.fip = fip & 0xf; > + fpstate.env.mode.prot.fcs = fip >> 4; > + fpstate.env.mode.prot.fop = fop; > + fpstate.env.mode.prot.fdp = fdp & 0xf; > + fpstate.env.mode.prot.fds = fdp >> 4; I've found the layouts in the SDM vol. 1, but I haven't been able to found the translation mechanism from real to protected. Could you maybe add a reference here? > + } > + > + if ( bytes == sizeof(fpstate.env) ) > + ptr = NULL; > + else > + ptr += sizeof(fpstate.env); > + break; > + > + case sizeof(struct x87_env16): > + case sizeof(struct x87_env16) + sizeof(fpstate.freg): > + { > + const struct x87_env16 *env = ptr; > + > + fpstate.env.fcw = env->fcw; > + fpstate.env.fsw = env->fsw; > + fpstate.env.ftw = env->ftw; > + > + if ( state->rex_prefix ) > + { > + fpstate.env.mode.prot.fip = env->mode.prot.fip; > + fpstate.env.mode.prot.fcs = env->mode.prot.fcs; > + fpstate.env.mode.prot.fdp = env->mode.prot.fdp; > + fpstate.env.mode.prot.fds = env->mode.prot.fds; > + fpstate.env.mode.prot.fop = 0; /* unknown */ > + } > + else > + { > + unsigned int fip = env->mode.real.fip_lo + > + (env->mode.real.fip_hi << 16); > + unsigned int fdp = env->mode.real.fdp_lo + > + (env->mode.real.fdp_hi << 16); > + unsigned int fop = env->mode.real.fop; > + > + fpstate.env.mode.prot.fip = fip & 0xf; > + fpstate.env.mode.prot.fcs = fip >> 4; > + fpstate.env.mode.prot.fop = fop; > + fpstate.env.mode.prot.fdp = fdp & 0xf; > + fpstate.env.mode.prot.fds = fdp >> 4; This looks mostly the same as the translation done above, so maybe could be abstracted anyway in a macro to avoid the code repetition? (ie: fpstate_real_to_prot(src, dst) or some such). Thanks, Roger.
On 08.05.2020 15:37, Roger Pau Monné wrote: > On Tue, May 05, 2020 at 10:16:20AM +0200, Jan Beulich wrote: >> --- a/tools/tests/x86_emulator/test_x86_emulator.c >> +++ b/tools/tests/x86_emulator/test_x86_emulator.c >> @@ -2442,6 +2442,27 @@ int main(int argc, char **argv) >> else >> printf("skipped\n"); >> >> + printf("%-40s", "Testing fldenv 8(%edx)..."); > > Likely a stupid question, but why the added 8? edx will contain the > memory address used to save the sate by fnstenv, so I would expect > fldenv to just load from there? The 8 is just to vary ModR/M encodings acrosss the various tests - it's an arbitrary choice. >> + if ( stack_exec && cpu_has_fpu ) >> + { >> + asm volatile ( "fnstenv %0\n\t" >> + "fninit" >> + : "=m" (res[2]) :: "memory" ); > > Why do you need the memory clobber here? I assume it's because res is > of type unsigned int and hence doesn't have the right size that > fnstenv will actually write to? Correct. >> @@ -4948,21 +4949,14 @@ x86_emulate( >> dst.bytes = 4; >> emulate_fpu_insn_memdst(b, modrm_reg & 7, dst.val); >> break; >> - case 4: /* fldenv - TODO */ >> - state->fpu_ctrl = true; >> - goto unimplemented_insn; >> - case 5: /* fldcw m2byte */ >> - state->fpu_ctrl = true; >> - fpu_memsrc16: >> - if ( (rc = ops->read(ea.mem.seg, ea.mem.off, &src.val, >> - 2, ctxt)) != X86EMUL_OKAY ) >> - goto done; >> - emulate_fpu_insn_memsrc(b, modrm_reg & 7, src.val); >> - break; >> + case 4: /* fldenv */ >> + /* Raise #MF now if there are pending unmasked exceptions. */ >> + emulate_fpu_insn_stub(0xd9, 0xd0 /* fnop */); > > Maybe it would make sense to have a wrapper for fnop? I'm not convinced that would be worth it. >> @@ -11648,6 +11651,89 @@ int x86_emul_blk( >> >> #ifndef X86EMUL_NO_FPU >> >> + case blk_fld: >> + ASSERT(!data); >> + >> + /* state->rex_prefix carries CR0.PE && !EFLAGS.VM setting */ >> + switch ( bytes ) >> + { >> + case sizeof(fpstate.env): >> + case sizeof(fpstate): >> + memcpy(&fpstate.env, ptr, sizeof(fpstate.env)); >> + if ( !state->rex_prefix ) >> + { >> + unsigned int fip = fpstate.env.mode.real.fip_lo + >> + (fpstate.env.mode.real.fip_hi << 16); >> + unsigned int fdp = fpstate.env.mode.real.fdp_lo + >> + (fpstate.env.mode.real.fdp_hi << 16); >> + unsigned int fop = fpstate.env.mode.real.fop; >> + >> + fpstate.env.mode.prot.fip = fip & 0xf; >> + fpstate.env.mode.prot.fcs = fip >> 4; >> + fpstate.env.mode.prot.fop = fop; >> + fpstate.env.mode.prot.fdp = fdp & 0xf; >> + fpstate.env.mode.prot.fds = fdp >> 4; > > I've found the layouts in the SDM vol. 1, but I haven't been able to > found the translation mechanism from real to protected. Could you > maybe add a reference here? A reference to some piece of documentation? I don't think this is spelled out anywhere. It's also only one of various possible ways of doing the translation, but among them the most flexible one for possible consumers of the data (because of using the smallest possible offsets into the segments). >> + } >> + >> + if ( bytes == sizeof(fpstate.env) ) >> + ptr = NULL; >> + else >> + ptr += sizeof(fpstate.env); >> + break; >> + >> + case sizeof(struct x87_env16): >> + case sizeof(struct x87_env16) + sizeof(fpstate.freg): >> + { >> + const struct x87_env16 *env = ptr; >> + >> + fpstate.env.fcw = env->fcw; >> + fpstate.env.fsw = env->fsw; >> + fpstate.env.ftw = env->ftw; >> + >> + if ( state->rex_prefix ) >> + { >> + fpstate.env.mode.prot.fip = env->mode.prot.fip; >> + fpstate.env.mode.prot.fcs = env->mode.prot.fcs; >> + fpstate.env.mode.prot.fdp = env->mode.prot.fdp; >> + fpstate.env.mode.prot.fds = env->mode.prot.fds; >> + fpstate.env.mode.prot.fop = 0; /* unknown */ >> + } >> + else >> + { >> + unsigned int fip = env->mode.real.fip_lo + >> + (env->mode.real.fip_hi << 16); >> + unsigned int fdp = env->mode.real.fdp_lo + >> + (env->mode.real.fdp_hi << 16); >> + unsigned int fop = env->mode.real.fop; >> + >> + fpstate.env.mode.prot.fip = fip & 0xf; >> + fpstate.env.mode.prot.fcs = fip >> 4; >> + fpstate.env.mode.prot.fop = fop; >> + fpstate.env.mode.prot.fdp = fdp & 0xf; >> + fpstate.env.mode.prot.fds = fdp >> 4; > > This looks mostly the same as the translation done above, so maybe > could be abstracted anyway in a macro to avoid the code repetition? > (ie: fpstate_real_to_prot(src, dst) or some such). Just the 5 assignments could be put in an inline function, but if we also wanted to abstract away the declarations with their initializers, it would need to be a macro because of the different types of fpstate.env and *env. While I'd generally prefer inline functions, the macro would have the benefit that it could be #define-d / #undef-d right inside this case block. Thoughts? Jan
On Fri, May 08, 2020 at 05:04:02PM +0200, Jan Beulich wrote: > On 08.05.2020 15:37, Roger Pau Monné wrote: > > On Tue, May 05, 2020 at 10:16:20AM +0200, Jan Beulich wrote: > >> --- a/tools/tests/x86_emulator/test_x86_emulator.c > >> +++ b/tools/tests/x86_emulator/test_x86_emulator.c > >> @@ -11648,6 +11651,89 @@ int x86_emul_blk( > >> > >> #ifndef X86EMUL_NO_FPU > >> > >> + case blk_fld: > >> + ASSERT(!data); > >> + > >> + /* state->rex_prefix carries CR0.PE && !EFLAGS.VM setting */ > >> + switch ( bytes ) > >> + { > >> + case sizeof(fpstate.env): > >> + case sizeof(fpstate): > >> + memcpy(&fpstate.env, ptr, sizeof(fpstate.env)); > >> + if ( !state->rex_prefix ) > >> + { > >> + unsigned int fip = fpstate.env.mode.real.fip_lo + > >> + (fpstate.env.mode.real.fip_hi << 16); > >> + unsigned int fdp = fpstate.env.mode.real.fdp_lo + > >> + (fpstate.env.mode.real.fdp_hi << 16); > >> + unsigned int fop = fpstate.env.mode.real.fop; > >> + > >> + fpstate.env.mode.prot.fip = fip & 0xf; > >> + fpstate.env.mode.prot.fcs = fip >> 4; > >> + fpstate.env.mode.prot.fop = fop; > >> + fpstate.env.mode.prot.fdp = fdp & 0xf; > >> + fpstate.env.mode.prot.fds = fdp >> 4; > > > > I've found the layouts in the SDM vol. 1, but I haven't been able to > > found the translation mechanism from real to protected. Could you > > maybe add a reference here? > > A reference to some piece of documentation? I don't think this > is spelled out anywhere. It's also only one of various possible > ways of doing the translation, but among them the most flexible > one for possible consumers of the data (because of using the > smallest possible offsets into the segments). Having this written down as a comment would help, but maybe that's just because I'm not familiar at all with all this stuff. Again, likely a very stupid question, but I would expect: fpstate.env.mode.prot.fip = fip; Without the mask. > >> + } > >> + > >> + if ( bytes == sizeof(fpstate.env) ) > >> + ptr = NULL; > >> + else > >> + ptr += sizeof(fpstate.env); > >> + break; > >> + > >> + case sizeof(struct x87_env16): > >> + case sizeof(struct x87_env16) + sizeof(fpstate.freg): > >> + { > >> + const struct x87_env16 *env = ptr; > >> + > >> + fpstate.env.fcw = env->fcw; > >> + fpstate.env.fsw = env->fsw; > >> + fpstate.env.ftw = env->ftw; > >> + > >> + if ( state->rex_prefix ) > >> + { > >> + fpstate.env.mode.prot.fip = env->mode.prot.fip; > >> + fpstate.env.mode.prot.fcs = env->mode.prot.fcs; > >> + fpstate.env.mode.prot.fdp = env->mode.prot.fdp; > >> + fpstate.env.mode.prot.fds = env->mode.prot.fds; > >> + fpstate.env.mode.prot.fop = 0; /* unknown */ > >> + } > >> + else > >> + { > >> + unsigned int fip = env->mode.real.fip_lo + > >> + (env->mode.real.fip_hi << 16); > >> + unsigned int fdp = env->mode.real.fdp_lo + > >> + (env->mode.real.fdp_hi << 16); > >> + unsigned int fop = env->mode.real.fop; > >> + > >> + fpstate.env.mode.prot.fip = fip & 0xf; > >> + fpstate.env.mode.prot.fcs = fip >> 4; > >> + fpstate.env.mode.prot.fop = fop; > >> + fpstate.env.mode.prot.fdp = fdp & 0xf; > >> + fpstate.env.mode.prot.fds = fdp >> 4; > > > > This looks mostly the same as the translation done above, so maybe > > could be abstracted anyway in a macro to avoid the code repetition? > > (ie: fpstate_real_to_prot(src, dst) or some such). > > Just the 5 assignments could be put in an inline function, but > if we also wanted to abstract away the declarations with their > initializers, it would need to be a macro because of the > different types of fpstate.env and *env. While I'd generally > prefer inline functions, the macro would have the benefit that > it could be #define-d / #undef-d right inside this case block. > Thoughts? I think a macro would be fine IMO. Thanks, Roger.
On 05/05/2020 09:16, Jan Beulich wrote: > While the Intel SDM claims that FRSTOR itself may raise #MF upon > completion, this was confirmed by Intel to be a doc error which will be > corrected in due course; behavior is like FLDENV, and like old hard copy > manuals describe it. Otherwise we'd have to emulate the insn by filling > st(N) in suitable order, followed by FLDENV. I wouldn't bother calling this out at all. We know the doc is going to be corrected in the next revision, and "what we would have done if the docs error was in fact accurate" only adds confusion to an already complicated change. > --- a/xen/arch/x86/x86_emulate/x86_emulate.c > +++ b/xen/arch/x86/x86_emulate/x86_emulate.c > @@ -857,6 +857,7 @@ struct x86_emulate_state { > blk_NONE, > blk_enqcmd, > #ifndef X86EMUL_NO_FPU > + blk_fld, /* FLDENV, FRSTOR */ > blk_fst, /* FNSTENV, FNSAVE */ > #endif > blk_movdir, > @@ -4948,21 +4949,14 @@ x86_emulate( > dst.bytes = 4; > emulate_fpu_insn_memdst(b, modrm_reg & 7, dst.val); > break; > - case 4: /* fldenv - TODO */ > - state->fpu_ctrl = true; > - goto unimplemented_insn; > - case 5: /* fldcw m2byte */ > - state->fpu_ctrl = true; > - fpu_memsrc16: > - if ( (rc = ops->read(ea.mem.seg, ea.mem.off, &src.val, > - 2, ctxt)) != X86EMUL_OKAY ) > - goto done; > - emulate_fpu_insn_memsrc(b, modrm_reg & 7, src.val); > - break; The commit message should however note that the larger-than-expected diff is purely code motion for case 5, to arrange fldenv to fall though into fstenv. Alternatively, could be pulled out into an earlier patch. > + case 4: /* fldenv */ > + /* Raise #MF now if there are pending unmasked exceptions. */ > + emulate_fpu_insn_stub(0xd9, 0xd0 /* fnop */); > + /* fall through */ > case 6: /* fnstenv */ > fail_if(!ops->blk); > - state->blk = blk_fst; > - /* REX is meaningless for this insn by this point. */ > + state->blk = modrm_reg & 2 ? blk_fst : blk_fld; > + /* REX is meaningless for these insns by this point. */ > rex_prefix = in_protmode(ctxt, ops); > if ( (rc = ops->blk(ea.mem.seg, ea.mem.off, NULL, > op_bytes > 2 ? sizeof(struct x87_env32) > @@ -4972,6 +4966,14 @@ x86_emulate( > goto done; > state->fpu_ctrl = true; > break; > + case 5: /* fldcw m2byte */ > + state->fpu_ctrl = true; > + fpu_memsrc16: > + if ( (rc = ops->read(ea.mem.seg, ea.mem.off, &src.val, > + 2, ctxt)) != X86EMUL_OKAY ) > + goto done; > + emulate_fpu_insn_memsrc(b, modrm_reg & 7, src.val); > + break; > case 7: /* fnstcw m2byte */ > state->fpu_ctrl = true; > fpu_memdst16: > @@ -5124,13 +5126,14 @@ x86_emulate( > dst.bytes = 8; > emulate_fpu_insn_memdst(b, modrm_reg & 7, dst.val); > break; > - case 4: /* frstor - TODO */ > - state->fpu_ctrl = true; > - goto unimplemented_insn; > + case 4: /* frstor */ > + /* Raise #MF now if there are pending unmasked exceptions. */ > + emulate_fpu_insn_stub(0xd9, 0xd0 /* fnop */); > + /* fall through */ > case 6: /* fnsave */ > fail_if(!ops->blk); > - state->blk = blk_fst; > - /* REX is meaningless for this insn by this point. */ > + state->blk = modrm_reg & 2 ? blk_fst : blk_fld; > + /* REX is meaningless for these insns by this point. */ > rex_prefix = in_protmode(ctxt, ops); > if ( (rc = ops->blk(ea.mem.seg, ea.mem.off, NULL, > op_bytes > 2 ? sizeof(struct x87_env32) + 80 > @@ -11648,6 +11651,89 @@ int x86_emul_blk( > > #ifndef X86EMUL_NO_FPU > > + case blk_fld: > + ASSERT(!data); > + > + /* state->rex_prefix carries CR0.PE && !EFLAGS.VM setting */ > + switch ( bytes ) > + { > + case sizeof(fpstate.env): > + case sizeof(fpstate): > + memcpy(&fpstate.env, ptr, sizeof(fpstate.env)); > + if ( !state->rex_prefix ) > + { > + unsigned int fip = fpstate.env.mode.real.fip_lo + > + (fpstate.env.mode.real.fip_hi << 16); > + unsigned int fdp = fpstate.env.mode.real.fdp_lo + > + (fpstate.env.mode.real.fdp_hi << 16); > + unsigned int fop = fpstate.env.mode.real.fop; > + > + fpstate.env.mode.prot.fip = fip & 0xf; > + fpstate.env.mode.prot.fcs = fip >> 4; > + fpstate.env.mode.prot.fop = fop; > + fpstate.env.mode.prot.fdp = fdp & 0xf; > + fpstate.env.mode.prot.fds = fdp >> 4; The same comments about comments apply here from the previous patch. ~Andrew
On 08/05/2020 16:04, Jan Beulich wrote: >>> + } >>> + >>> + if ( bytes == sizeof(fpstate.env) ) >>> + ptr = NULL; >>> + else >>> + ptr += sizeof(fpstate.env); >>> + break; >>> + >>> + case sizeof(struct x87_env16): >>> + case sizeof(struct x87_env16) + sizeof(fpstate.freg): >>> + { >>> + const struct x87_env16 *env = ptr; >>> + >>> + fpstate.env.fcw = env->fcw; >>> + fpstate.env.fsw = env->fsw; >>> + fpstate.env.ftw = env->ftw; >>> + >>> + if ( state->rex_prefix ) >>> + { >>> + fpstate.env.mode.prot.fip = env->mode.prot.fip; >>> + fpstate.env.mode.prot.fcs = env->mode.prot.fcs; >>> + fpstate.env.mode.prot.fdp = env->mode.prot.fdp; >>> + fpstate.env.mode.prot.fds = env->mode.prot.fds; >>> + fpstate.env.mode.prot.fop = 0; /* unknown */ >>> + } >>> + else >>> + { >>> + unsigned int fip = env->mode.real.fip_lo + >>> + (env->mode.real.fip_hi << 16); >>> + unsigned int fdp = env->mode.real.fdp_lo + >>> + (env->mode.real.fdp_hi << 16); >>> + unsigned int fop = env->mode.real.fop; >>> + >>> + fpstate.env.mode.prot.fip = fip & 0xf; >>> + fpstate.env.mode.prot.fcs = fip >> 4; >>> + fpstate.env.mode.prot.fop = fop; >>> + fpstate.env.mode.prot.fdp = fdp & 0xf; >>> + fpstate.env.mode.prot.fds = fdp >> 4; >> This looks mostly the same as the translation done above, so maybe >> could be abstracted anyway in a macro to avoid the code repetition? >> (ie: fpstate_real_to_prot(src, dst) or some such). > Just the 5 assignments could be put in an inline function, but > if we also wanted to abstract away the declarations with their > initializers, it would need to be a macro because of the > different types of fpstate.env and *env. While I'd generally > prefer inline functions, the macro would have the benefit that > it could be #define-d / #undef-d right inside this case block. > Thoughts? Code like this is large in terms of volume, but it is completely crystal clear (with the requested comments in place) and easy to follow. I don't see how attempting to abstract out two small portions is going to be an improvement. ~Andrew
On 08.05.2020 20:29, Andrew Cooper wrote: > On 08/05/2020 16:04, Jan Beulich wrote: >>>> + } >>>> + >>>> + if ( bytes == sizeof(fpstate.env) ) >>>> + ptr = NULL; >>>> + else >>>> + ptr += sizeof(fpstate.env); >>>> + break; >>>> + >>>> + case sizeof(struct x87_env16): >>>> + case sizeof(struct x87_env16) + sizeof(fpstate.freg): >>>> + { >>>> + const struct x87_env16 *env = ptr; >>>> + >>>> + fpstate.env.fcw = env->fcw; >>>> + fpstate.env.fsw = env->fsw; >>>> + fpstate.env.ftw = env->ftw; >>>> + >>>> + if ( state->rex_prefix ) >>>> + { >>>> + fpstate.env.mode.prot.fip = env->mode.prot.fip; >>>> + fpstate.env.mode.prot.fcs = env->mode.prot.fcs; >>>> + fpstate.env.mode.prot.fdp = env->mode.prot.fdp; >>>> + fpstate.env.mode.prot.fds = env->mode.prot.fds; >>>> + fpstate.env.mode.prot.fop = 0; /* unknown */ >>>> + } >>>> + else >>>> + { >>>> + unsigned int fip = env->mode.real.fip_lo + >>>> + (env->mode.real.fip_hi << 16); >>>> + unsigned int fdp = env->mode.real.fdp_lo + >>>> + (env->mode.real.fdp_hi << 16); >>>> + unsigned int fop = env->mode.real.fop; >>>> + >>>> + fpstate.env.mode.prot.fip = fip & 0xf; >>>> + fpstate.env.mode.prot.fcs = fip >> 4; >>>> + fpstate.env.mode.prot.fop = fop; >>>> + fpstate.env.mode.prot.fdp = fdp & 0xf; >>>> + fpstate.env.mode.prot.fds = fdp >> 4; >>> This looks mostly the same as the translation done above, so maybe >>> could be abstracted anyway in a macro to avoid the code repetition? >>> (ie: fpstate_real_to_prot(src, dst) or some such). >> Just the 5 assignments could be put in an inline function, but >> if we also wanted to abstract away the declarations with their >> initializers, it would need to be a macro because of the >> different types of fpstate.env and *env. While I'd generally >> prefer inline functions, the macro would have the benefit that >> it could be #define-d / #undef-d right inside this case block. >> Thoughts? > > Code like this is large in terms of volume, but it is completely crystal > clear (with the requested comments in place) and easy to follow. > > I don't see how attempting to abstract out two small portions is going > to be an improvement. Okay, easier for me if I don't need to touch it. Roger, can you live with that? Jan
On 08.05.2020 18:21, Roger Pau Monné wrote: > On Fri, May 08, 2020 at 05:04:02PM +0200, Jan Beulich wrote: >> On 08.05.2020 15:37, Roger Pau Monné wrote: >>> On Tue, May 05, 2020 at 10:16:20AM +0200, Jan Beulich wrote: >>>> --- a/tools/tests/x86_emulator/test_x86_emulator.c >>>> +++ b/tools/tests/x86_emulator/test_x86_emulator.c >>>> @@ -11648,6 +11651,89 @@ int x86_emul_blk( >>>> >>>> #ifndef X86EMUL_NO_FPU >>>> >>>> + case blk_fld: >>>> + ASSERT(!data); >>>> + >>>> + /* state->rex_prefix carries CR0.PE && !EFLAGS.VM setting */ >>>> + switch ( bytes ) >>>> + { >>>> + case sizeof(fpstate.env): >>>> + case sizeof(fpstate): >>>> + memcpy(&fpstate.env, ptr, sizeof(fpstate.env)); >>>> + if ( !state->rex_prefix ) >>>> + { >>>> + unsigned int fip = fpstate.env.mode.real.fip_lo + >>>> + (fpstate.env.mode.real.fip_hi << 16); >>>> + unsigned int fdp = fpstate.env.mode.real.fdp_lo + >>>> + (fpstate.env.mode.real.fdp_hi << 16); >>>> + unsigned int fop = fpstate.env.mode.real.fop; >>>> + >>>> + fpstate.env.mode.prot.fip = fip & 0xf; >>>> + fpstate.env.mode.prot.fcs = fip >> 4; >>>> + fpstate.env.mode.prot.fop = fop; >>>> + fpstate.env.mode.prot.fdp = fdp & 0xf; >>>> + fpstate.env.mode.prot.fds = fdp >> 4; >>> >>> I've found the layouts in the SDM vol. 1, but I haven't been able to >>> found the translation mechanism from real to protected. Could you >>> maybe add a reference here? >> >> A reference to some piece of documentation? I don't think this >> is spelled out anywhere. It's also only one of various possible >> ways of doing the translation, but among them the most flexible >> one for possible consumers of the data (because of using the >> smallest possible offsets into the segments). > > Having this written down as a comment would help, but maybe that's > just because I'm not familiar at all with all this stuff. > > Again, likely a very stupid question, but I would expect: > > fpstate.env.mode.prot.fip = fip; > > Without the mask. How that? A linear address has many ways of decomposing into a real/vm86 mode ssss:oooo pair, but what you suggest is not one of them. The other extreme to the one chosen would be fpstate.env.mode.prot.fip = fip & 0xffff; fpstate.env.mode.prot.fcs = (fip >> 4) & 0xf000; Except that when doing it this way, even the full insn (or for fcs:fdp the full operand) may not be accessible through the resulting ssss, due to segment wraparound. Jan
On Mon, May 11, 2020 at 09:25:54AM +0200, Jan Beulich wrote: > [CAUTION - EXTERNAL EMAIL] DO NOT reply, click links, or open attachments unless you have verified the sender and know the content is safe. > > On 08.05.2020 20:29, Andrew Cooper wrote: > > On 08/05/2020 16:04, Jan Beulich wrote: > >>>> + } > >>>> + > >>>> + if ( bytes == sizeof(fpstate.env) ) > >>>> + ptr = NULL; > >>>> + else > >>>> + ptr += sizeof(fpstate.env); > >>>> + break; > >>>> + > >>>> + case sizeof(struct x87_env16): > >>>> + case sizeof(struct x87_env16) + sizeof(fpstate.freg): > >>>> + { > >>>> + const struct x87_env16 *env = ptr; > >>>> + > >>>> + fpstate.env.fcw = env->fcw; > >>>> + fpstate.env.fsw = env->fsw; > >>>> + fpstate.env.ftw = env->ftw; > >>>> + > >>>> + if ( state->rex_prefix ) > >>>> + { > >>>> + fpstate.env.mode.prot.fip = env->mode.prot.fip; > >>>> + fpstate.env.mode.prot.fcs = env->mode.prot.fcs; > >>>> + fpstate.env.mode.prot.fdp = env->mode.prot.fdp; > >>>> + fpstate.env.mode.prot.fds = env->mode.prot.fds; > >>>> + fpstate.env.mode.prot.fop = 0; /* unknown */ > >>>> + } > >>>> + else > >>>> + { > >>>> + unsigned int fip = env->mode.real.fip_lo + > >>>> + (env->mode.real.fip_hi << 16); > >>>> + unsigned int fdp = env->mode.real.fdp_lo + > >>>> + (env->mode.real.fdp_hi << 16); > >>>> + unsigned int fop = env->mode.real.fop; > >>>> + > >>>> + fpstate.env.mode.prot.fip = fip & 0xf; > >>>> + fpstate.env.mode.prot.fcs = fip >> 4; > >>>> + fpstate.env.mode.prot.fop = fop; > >>>> + fpstate.env.mode.prot.fdp = fdp & 0xf; > >>>> + fpstate.env.mode.prot.fds = fdp >> 4; > >>> This looks mostly the same as the translation done above, so maybe > >>> could be abstracted anyway in a macro to avoid the code repetition? > >>> (ie: fpstate_real_to_prot(src, dst) or some such). > >> Just the 5 assignments could be put in an inline function, but > >> if we also wanted to abstract away the declarations with their > >> initializers, it would need to be a macro because of the > >> different types of fpstate.env and *env. While I'd generally > >> prefer inline functions, the macro would have the benefit that > >> it could be #define-d / #undef-d right inside this case block. > >> Thoughts? > > > > Code like this is large in terms of volume, but it is completely crystal > > clear (with the requested comments in place) and easy to follow. > > > > I don't see how attempting to abstract out two small portions is going > > to be an improvement. > > Okay, easier for me if I don't need to touch it. Roger, can you > live with that? Yes, that's fine. Thanks.
On Mon, May 11, 2020 at 09:29:27AM +0200, Jan Beulich wrote: > On 08.05.2020 18:21, Roger Pau Monné wrote: > > On Fri, May 08, 2020 at 05:04:02PM +0200, Jan Beulich wrote: > >> On 08.05.2020 15:37, Roger Pau Monné wrote: > >>> On Tue, May 05, 2020 at 10:16:20AM +0200, Jan Beulich wrote: > >>>> --- a/tools/tests/x86_emulator/test_x86_emulator.c > >>>> +++ b/tools/tests/x86_emulator/test_x86_emulator.c > >>>> @@ -11648,6 +11651,89 @@ int x86_emul_blk( > >>>> > >>>> #ifndef X86EMUL_NO_FPU > >>>> > >>>> + case blk_fld: > >>>> + ASSERT(!data); > >>>> + > >>>> + /* state->rex_prefix carries CR0.PE && !EFLAGS.VM setting */ > >>>> + switch ( bytes ) > >>>> + { > >>>> + case sizeof(fpstate.env): > >>>> + case sizeof(fpstate): > >>>> + memcpy(&fpstate.env, ptr, sizeof(fpstate.env)); > >>>> + if ( !state->rex_prefix ) > >>>> + { > >>>> + unsigned int fip = fpstate.env.mode.real.fip_lo + > >>>> + (fpstate.env.mode.real.fip_hi << 16); > >>>> + unsigned int fdp = fpstate.env.mode.real.fdp_lo + > >>>> + (fpstate.env.mode.real.fdp_hi << 16); > >>>> + unsigned int fop = fpstate.env.mode.real.fop; > >>>> + > >>>> + fpstate.env.mode.prot.fip = fip & 0xf; > >>>> + fpstate.env.mode.prot.fcs = fip >> 4; > >>>> + fpstate.env.mode.prot.fop = fop; > >>>> + fpstate.env.mode.prot.fdp = fdp & 0xf; > >>>> + fpstate.env.mode.prot.fds = fdp >> 4; > >>> > >>> I've found the layouts in the SDM vol. 1, but I haven't been able to > >>> found the translation mechanism from real to protected. Could you > >>> maybe add a reference here? > >> > >> A reference to some piece of documentation? I don't think this > >> is spelled out anywhere. It's also only one of various possible > >> ways of doing the translation, but among them the most flexible > >> one for possible consumers of the data (because of using the > >> smallest possible offsets into the segments). > > > > Having this written down as a comment would help, but maybe that's > > just because I'm not familiar at all with all this stuff. > > > > Again, likely a very stupid question, but I would expect: > > > > fpstate.env.mode.prot.fip = fip; > > > > Without the mask. > > How that? A linear address has many ways of decomposing into a > real/vm86 mode ssss:oooo pair, but what you suggest is not one > of them. The other extreme to the one chosen would be > > fpstate.env.mode.prot.fip = fip & 0xffff; > fpstate.env.mode.prot.fcs = (fip >> 4) & 0xf000; > > Except that when doing it this way, even the full insn (or for > fcs:fdp the full operand) may not be accessible through the > resulting ssss, due to segment wraparound. Thanks for the explanation. I see it's better to split the offset into the lower 4 bytes only in order to prevent overflow. Roger.
--- a/tools/tests/x86_emulator/test_x86_emulator.c +++ b/tools/tests/x86_emulator/test_x86_emulator.c @@ -2442,6 +2442,27 @@ int main(int argc, char **argv) else printf("skipped\n"); + printf("%-40s", "Testing fldenv 8(%edx)..."); + if ( stack_exec && cpu_has_fpu ) + { + asm volatile ( "fnstenv %0\n\t" + "fninit" + : "=m" (res[2]) :: "memory" ); + zap_fpsel(&res[2], true); + instr[0] = 0xd9; instr[1] = 0x62; instr[2] = 0x08; + regs.eip = (unsigned long)&instr[0]; + regs.edx = (unsigned long)res; + rc = x86_emulate(&ctxt, &emulops); + asm volatile ( "fnstenv %0" : "=m" (res[9]) :: "memory" ); + if ( (rc != X86EMUL_OKAY) || + memcmp(res + 2, res + 9, 28) || + (regs.eip != (unsigned long)&instr[3]) ) + goto fail; + printf("okay\n"); + } + else + printf("skipped\n"); + printf("%-40s", "Testing 16-bit fnsave (%ecx)..."); if ( stack_exec && cpu_has_fpu ) { @@ -2468,6 +2489,31 @@ int main(int argc, char **argv) goto fail; printf("okay\n"); } + else + printf("skipped\n"); + + printf("%-40s", "Testing frstor (%edx)..."); + if ( stack_exec && cpu_has_fpu ) + { + const uint16_t seven = 7; + + asm volatile ( "fninit\n\t" + "fld1\n\t" + "fidivs %1\n\t" + "fnsave %0\n\t" + : "=&m" (res[0]) : "m" (seven) : "memory" ); + zap_fpsel(&res[0], true); + instr[0] = 0xdd; instr[1] = 0x22; + regs.eip = (unsigned long)&instr[0]; + regs.edx = (unsigned long)res; + rc = x86_emulate(&ctxt, &emulops); + asm volatile ( "fnsave %0" : "=m" (res[27]) :: "memory" ); + if ( (rc != X86EMUL_OKAY) || + memcmp(res, res + 27, 108) || + (regs.eip != (unsigned long)&instr[2]) ) + goto fail; + printf("okay\n"); + } else printf("skipped\n"); --- a/xen/arch/x86/x86_emulate/x86_emulate.c +++ b/xen/arch/x86/x86_emulate/x86_emulate.c @@ -857,6 +857,7 @@ struct x86_emulate_state { blk_NONE, blk_enqcmd, #ifndef X86EMUL_NO_FPU + blk_fld, /* FLDENV, FRSTOR */ blk_fst, /* FNSTENV, FNSAVE */ #endif blk_movdir, @@ -4948,21 +4949,14 @@ x86_emulate( dst.bytes = 4; emulate_fpu_insn_memdst(b, modrm_reg & 7, dst.val); break; - case 4: /* fldenv - TODO */ - state->fpu_ctrl = true; - goto unimplemented_insn; - case 5: /* fldcw m2byte */ - state->fpu_ctrl = true; - fpu_memsrc16: - if ( (rc = ops->read(ea.mem.seg, ea.mem.off, &src.val, - 2, ctxt)) != X86EMUL_OKAY ) - goto done; - emulate_fpu_insn_memsrc(b, modrm_reg & 7, src.val); - break; + case 4: /* fldenv */ + /* Raise #MF now if there are pending unmasked exceptions. */ + emulate_fpu_insn_stub(0xd9, 0xd0 /* fnop */); + /* fall through */ case 6: /* fnstenv */ fail_if(!ops->blk); - state->blk = blk_fst; - /* REX is meaningless for this insn by this point. */ + state->blk = modrm_reg & 2 ? blk_fst : blk_fld; + /* REX is meaningless for these insns by this point. */ rex_prefix = in_protmode(ctxt, ops); if ( (rc = ops->blk(ea.mem.seg, ea.mem.off, NULL, op_bytes > 2 ? sizeof(struct x87_env32) @@ -4972,6 +4966,14 @@ x86_emulate( goto done; state->fpu_ctrl = true; break; + case 5: /* fldcw m2byte */ + state->fpu_ctrl = true; + fpu_memsrc16: + if ( (rc = ops->read(ea.mem.seg, ea.mem.off, &src.val, + 2, ctxt)) != X86EMUL_OKAY ) + goto done; + emulate_fpu_insn_memsrc(b, modrm_reg & 7, src.val); + break; case 7: /* fnstcw m2byte */ state->fpu_ctrl = true; fpu_memdst16: @@ -5124,13 +5126,14 @@ x86_emulate( dst.bytes = 8; emulate_fpu_insn_memdst(b, modrm_reg & 7, dst.val); break; - case 4: /* frstor - TODO */ - state->fpu_ctrl = true; - goto unimplemented_insn; + case 4: /* frstor */ + /* Raise #MF now if there are pending unmasked exceptions. */ + emulate_fpu_insn_stub(0xd9, 0xd0 /* fnop */); + /* fall through */ case 6: /* fnsave */ fail_if(!ops->blk); - state->blk = blk_fst; - /* REX is meaningless for this insn by this point. */ + state->blk = modrm_reg & 2 ? blk_fst : blk_fld; + /* REX is meaningless for these insns by this point. */ rex_prefix = in_protmode(ctxt, ops); if ( (rc = ops->blk(ea.mem.seg, ea.mem.off, NULL, op_bytes > 2 ? sizeof(struct x87_env32) + 80 @@ -11648,6 +11651,89 @@ int x86_emul_blk( #ifndef X86EMUL_NO_FPU + case blk_fld: + ASSERT(!data); + + /* state->rex_prefix carries CR0.PE && !EFLAGS.VM setting */ + switch ( bytes ) + { + case sizeof(fpstate.env): + case sizeof(fpstate): + memcpy(&fpstate.env, ptr, sizeof(fpstate.env)); + if ( !state->rex_prefix ) + { + unsigned int fip = fpstate.env.mode.real.fip_lo + + (fpstate.env.mode.real.fip_hi << 16); + unsigned int fdp = fpstate.env.mode.real.fdp_lo + + (fpstate.env.mode.real.fdp_hi << 16); + unsigned int fop = fpstate.env.mode.real.fop; + + fpstate.env.mode.prot.fip = fip & 0xf; + fpstate.env.mode.prot.fcs = fip >> 4; + fpstate.env.mode.prot.fop = fop; + fpstate.env.mode.prot.fdp = fdp & 0xf; + fpstate.env.mode.prot.fds = fdp >> 4; + } + + if ( bytes == sizeof(fpstate.env) ) + ptr = NULL; + else + ptr += sizeof(fpstate.env); + break; + + case sizeof(struct x87_env16): + case sizeof(struct x87_env16) + sizeof(fpstate.freg): + { + const struct x87_env16 *env = ptr; + + fpstate.env.fcw = env->fcw; + fpstate.env.fsw = env->fsw; + fpstate.env.ftw = env->ftw; + + if ( state->rex_prefix ) + { + fpstate.env.mode.prot.fip = env->mode.prot.fip; + fpstate.env.mode.prot.fcs = env->mode.prot.fcs; + fpstate.env.mode.prot.fdp = env->mode.prot.fdp; + fpstate.env.mode.prot.fds = env->mode.prot.fds; + fpstate.env.mode.prot.fop = 0; /* unknown */ + } + else + { + unsigned int fip = env->mode.real.fip_lo + + (env->mode.real.fip_hi << 16); + unsigned int fdp = env->mode.real.fdp_lo + + (env->mode.real.fdp_hi << 16); + unsigned int fop = env->mode.real.fop; + + fpstate.env.mode.prot.fip = fip & 0xf; + fpstate.env.mode.prot.fcs = fip >> 4; + fpstate.env.mode.prot.fop = fop; + fpstate.env.mode.prot.fdp = fdp & 0xf; + fpstate.env.mode.prot.fds = fdp >> 4; + } + + if ( bytes == sizeof(*env) ) + ptr = NULL; + else + ptr += sizeof(*env); + break; + } + + default: + ASSERT_UNREACHABLE(); + return X86EMUL_UNHANDLEABLE; + } + + if ( ptr ) + { + memcpy(fpstate.freg, ptr, sizeof(fpstate.freg)); + asm volatile ( "frstor %0" :: "m" (fpstate) ); + } + else + asm volatile ( "fldenv %0" :: "m" (fpstate.env) ); + break; + case blk_fst: ASSERT(!data);
While the Intel SDM claims that FRSTOR itself may raise #MF upon completion, this was confirmed by Intel to be a doc error which will be corrected in due course; behavior is like FLDENV, and like old hard copy manuals describe it. Otherwise we'd have to emulate the insn by filling st(N) in suitable order, followed by FLDENV. Signed-off-by: Jan Beulich <jbeulich@suse.com> --- v7: New.