diff mbox series

[v8,08/12] x86emul: support FLDENV and FRSTOR

Message ID 09fe2c18-0037-af71-93be-87261051e2a2@suse.com (mailing list archive)
State Superseded
Headers show
Series x86emul: further work | expand

Commit Message

Jan Beulich May 5, 2020, 8:16 a.m. UTC
While the Intel SDM claims that FRSTOR itself may raise #MF upon
completion, this was confirmed by Intel to be a doc error which will be
corrected in due course; behavior is like FLDENV, and like old hard copy
manuals describe it. Otherwise we'd have to emulate the insn by filling
st(N) in suitable order, followed by FLDENV.

Signed-off-by: Jan Beulich <jbeulich@suse.com>
---
v7: New.

Comments

Roger Pau Monné May 8, 2020, 1:37 p.m. UTC | #1
On Tue, May 05, 2020 at 10:16:20AM +0200, Jan Beulich wrote:
> While the Intel SDM claims that FRSTOR itself may raise #MF upon
> completion, this was confirmed by Intel to be a doc error which will be
> corrected in due course; behavior is like FLDENV, and like old hard copy
> manuals describe it. Otherwise we'd have to emulate the insn by filling
> st(N) in suitable order, followed by FLDENV.
> 
> Signed-off-by: Jan Beulich <jbeulich@suse.com>
> ---
> v7: New.
> 
> --- a/tools/tests/x86_emulator/test_x86_emulator.c
> +++ b/tools/tests/x86_emulator/test_x86_emulator.c
> @@ -2442,6 +2442,27 @@ int main(int argc, char **argv)
>      else
>          printf("skipped\n");
>  
> +    printf("%-40s", "Testing fldenv 8(%edx)...");

Likely a stupid question, but why the added 8? edx will contain the
memory address used to save the sate by fnstenv, so I would expect
fldenv to just load from there?

> +    if ( stack_exec && cpu_has_fpu )
> +    {
> +        asm volatile ( "fnstenv %0\n\t"
> +                       "fninit"
> +                       : "=m" (res[2]) :: "memory" );

Why do you need the memory clobber here? I assume it's because res is
of type unsigned int and hence doesn't have the right size that
fnstenv will actually write to?

> +        zap_fpsel(&res[2], true);
> +        instr[0] = 0xd9; instr[1] = 0x62; instr[2] = 0x08;
> +        regs.eip = (unsigned long)&instr[0];
> +        regs.edx = (unsigned long)res;
> +        rc = x86_emulate(&ctxt, &emulops);
> +        asm volatile ( "fnstenv %0" : "=m" (res[9]) :: "memory" );
> +        if ( (rc != X86EMUL_OKAY) ||
> +             memcmp(res + 2, res + 9, 28) ||
> +             (regs.eip != (unsigned long)&instr[3]) )
> +            goto fail;
> +        printf("okay\n");
> +    }
> +    else
> +        printf("skipped\n");
> +
>      printf("%-40s", "Testing 16-bit fnsave (%ecx)...");
>      if ( stack_exec && cpu_has_fpu )
>      {
> @@ -2468,6 +2489,31 @@ int main(int argc, char **argv)
>              goto fail;
>          printf("okay\n");
>      }
> +    else
> +        printf("skipped\n");
> +
> +    printf("%-40s", "Testing frstor (%edx)...");
> +    if ( stack_exec && cpu_has_fpu )
> +    {
> +        const uint16_t seven = 7;
> +
> +        asm volatile ( "fninit\n\t"
> +                       "fld1\n\t"
> +                       "fidivs %1\n\t"
> +                       "fnsave %0\n\t"
> +                       : "=&m" (res[0]) : "m" (seven) : "memory" );
> +        zap_fpsel(&res[0], true);
> +        instr[0] = 0xdd; instr[1] = 0x22;
> +        regs.eip = (unsigned long)&instr[0];
> +        regs.edx = (unsigned long)res;
> +        rc = x86_emulate(&ctxt, &emulops);
> +        asm volatile ( "fnsave %0" : "=m" (res[27]) :: "memory" );
> +        if ( (rc != X86EMUL_OKAY) ||
> +             memcmp(res, res + 27, 108) ||
> +             (regs.eip != (unsigned long)&instr[2]) )
> +            goto fail;
> +        printf("okay\n");
> +    }
>      else
>          printf("skipped\n");
>  
> --- a/xen/arch/x86/x86_emulate/x86_emulate.c
> +++ b/xen/arch/x86/x86_emulate/x86_emulate.c
> @@ -857,6 +857,7 @@ struct x86_emulate_state {
>          blk_NONE,
>          blk_enqcmd,
>  #ifndef X86EMUL_NO_FPU
> +        blk_fld, /* FLDENV, FRSTOR */
>          blk_fst, /* FNSTENV, FNSAVE */
>  #endif
>          blk_movdir,
> @@ -4948,21 +4949,14 @@ x86_emulate(
>                  dst.bytes = 4;
>                  emulate_fpu_insn_memdst(b, modrm_reg & 7, dst.val);
>                  break;
> -            case 4: /* fldenv - TODO */
> -                state->fpu_ctrl = true;
> -                goto unimplemented_insn;
> -            case 5: /* fldcw m2byte */
> -                state->fpu_ctrl = true;
> -            fpu_memsrc16:
> -                if ( (rc = ops->read(ea.mem.seg, ea.mem.off, &src.val,
> -                                     2, ctxt)) != X86EMUL_OKAY )
> -                    goto done;
> -                emulate_fpu_insn_memsrc(b, modrm_reg & 7, src.val);
> -                break;
> +            case 4: /* fldenv */
> +                /* Raise #MF now if there are pending unmasked exceptions. */
> +                emulate_fpu_insn_stub(0xd9, 0xd0 /* fnop */);

Maybe it would make sense to have a wrapper for fnop?

> +                /* fall through */
>              case 6: /* fnstenv */
>                  fail_if(!ops->blk);
> -                state->blk = blk_fst;
> -                /* REX is meaningless for this insn by this point. */
> +                state->blk = modrm_reg & 2 ? blk_fst : blk_fld;
> +                /* REX is meaningless for these insns by this point. */
>                  rex_prefix = in_protmode(ctxt, ops);
>                  if ( (rc = ops->blk(ea.mem.seg, ea.mem.off, NULL,
>                                      op_bytes > 2 ? sizeof(struct x87_env32)
> @@ -4972,6 +4966,14 @@ x86_emulate(
>                      goto done;
>                  state->fpu_ctrl = true;
>                  break;
> +            case 5: /* fldcw m2byte */
> +                state->fpu_ctrl = true;
> +            fpu_memsrc16:
> +                if ( (rc = ops->read(ea.mem.seg, ea.mem.off, &src.val,
> +                                     2, ctxt)) != X86EMUL_OKAY )
> +                    goto done;
> +                emulate_fpu_insn_memsrc(b, modrm_reg & 7, src.val);
> +                break;
>              case 7: /* fnstcw m2byte */
>                  state->fpu_ctrl = true;
>              fpu_memdst16:
> @@ -5124,13 +5126,14 @@ x86_emulate(
>                  dst.bytes = 8;
>                  emulate_fpu_insn_memdst(b, modrm_reg & 7, dst.val);
>                  break;
> -            case 4: /* frstor - TODO */
> -                state->fpu_ctrl = true;
> -                goto unimplemented_insn;
> +            case 4: /* frstor */
> +                /* Raise #MF now if there are pending unmasked exceptions. */
> +                emulate_fpu_insn_stub(0xd9, 0xd0 /* fnop */);
> +                /* fall through */
>              case 6: /* fnsave */
>                  fail_if(!ops->blk);
> -                state->blk = blk_fst;
> -                /* REX is meaningless for this insn by this point. */
> +                state->blk = modrm_reg & 2 ? blk_fst : blk_fld;
> +                /* REX is meaningless for these insns by this point. */
>                  rex_prefix = in_protmode(ctxt, ops);
>                  if ( (rc = ops->blk(ea.mem.seg, ea.mem.off, NULL,
>                                      op_bytes > 2 ? sizeof(struct x87_env32) + 80
> @@ -11648,6 +11651,89 @@ int x86_emul_blk(
>  
>  #ifndef X86EMUL_NO_FPU
>  
> +    case blk_fld:
> +        ASSERT(!data);
> +
> +        /* state->rex_prefix carries CR0.PE && !EFLAGS.VM setting */
> +        switch ( bytes )
> +        {
> +        case sizeof(fpstate.env):
> +        case sizeof(fpstate):
> +            memcpy(&fpstate.env, ptr, sizeof(fpstate.env));
> +            if ( !state->rex_prefix )
> +            {
> +                unsigned int fip = fpstate.env.mode.real.fip_lo +
> +                                   (fpstate.env.mode.real.fip_hi << 16);
> +                unsigned int fdp = fpstate.env.mode.real.fdp_lo +
> +                                   (fpstate.env.mode.real.fdp_hi << 16);
> +                unsigned int fop = fpstate.env.mode.real.fop;
> +
> +                fpstate.env.mode.prot.fip = fip & 0xf;
> +                fpstate.env.mode.prot.fcs = fip >> 4;
> +                fpstate.env.mode.prot.fop = fop;
> +                fpstate.env.mode.prot.fdp = fdp & 0xf;
> +                fpstate.env.mode.prot.fds = fdp >> 4;

I've found the layouts in the SDM vol. 1, but I haven't been able to
found the translation mechanism from real to protected. Could you
maybe add a reference here?

> +            }
> +
> +            if ( bytes == sizeof(fpstate.env) )
> +                ptr = NULL;
> +            else
> +                ptr += sizeof(fpstate.env);
> +            break;
> +
> +        case sizeof(struct x87_env16):
> +        case sizeof(struct x87_env16) + sizeof(fpstate.freg):
> +        {
> +            const struct x87_env16 *env = ptr;
> +
> +            fpstate.env.fcw = env->fcw;
> +            fpstate.env.fsw = env->fsw;
> +            fpstate.env.ftw = env->ftw;
> +
> +            if ( state->rex_prefix )
> +            {
> +                fpstate.env.mode.prot.fip = env->mode.prot.fip;
> +                fpstate.env.mode.prot.fcs = env->mode.prot.fcs;
> +                fpstate.env.mode.prot.fdp = env->mode.prot.fdp;
> +                fpstate.env.mode.prot.fds = env->mode.prot.fds;
> +                fpstate.env.mode.prot.fop = 0; /* unknown */
> +            }
> +            else
> +            {
> +                unsigned int fip = env->mode.real.fip_lo +
> +                                   (env->mode.real.fip_hi << 16);
> +                unsigned int fdp = env->mode.real.fdp_lo +
> +                                   (env->mode.real.fdp_hi << 16);
> +                unsigned int fop = env->mode.real.fop;
> +
> +                fpstate.env.mode.prot.fip = fip & 0xf;
> +                fpstate.env.mode.prot.fcs = fip >> 4;
> +                fpstate.env.mode.prot.fop = fop;
> +                fpstate.env.mode.prot.fdp = fdp & 0xf;
> +                fpstate.env.mode.prot.fds = fdp >> 4;

This looks mostly the same as the translation done above, so maybe
could be abstracted anyway in a macro to avoid the code repetition?
(ie: fpstate_real_to_prot(src, dst) or some such).

Thanks, Roger.
Jan Beulich May 8, 2020, 3:04 p.m. UTC | #2
On 08.05.2020 15:37, Roger Pau Monné wrote:
> On Tue, May 05, 2020 at 10:16:20AM +0200, Jan Beulich wrote:
>> --- a/tools/tests/x86_emulator/test_x86_emulator.c
>> +++ b/tools/tests/x86_emulator/test_x86_emulator.c
>> @@ -2442,6 +2442,27 @@ int main(int argc, char **argv)
>>      else
>>          printf("skipped\n");
>>  
>> +    printf("%-40s", "Testing fldenv 8(%edx)...");
> 
> Likely a stupid question, but why the added 8? edx will contain the
> memory address used to save the sate by fnstenv, so I would expect
> fldenv to just load from there?

The 8 is just to vary ModR/M encodings acrosss the various
tests - it's an arbitrary choice.

>> +    if ( stack_exec && cpu_has_fpu )
>> +    {
>> +        asm volatile ( "fnstenv %0\n\t"
>> +                       "fninit"
>> +                       : "=m" (res[2]) :: "memory" );
> 
> Why do you need the memory clobber here? I assume it's because res is
> of type unsigned int and hence doesn't have the right size that
> fnstenv will actually write to?

Correct.

>> @@ -4948,21 +4949,14 @@ x86_emulate(
>>                  dst.bytes = 4;
>>                  emulate_fpu_insn_memdst(b, modrm_reg & 7, dst.val);
>>                  break;
>> -            case 4: /* fldenv - TODO */
>> -                state->fpu_ctrl = true;
>> -                goto unimplemented_insn;
>> -            case 5: /* fldcw m2byte */
>> -                state->fpu_ctrl = true;
>> -            fpu_memsrc16:
>> -                if ( (rc = ops->read(ea.mem.seg, ea.mem.off, &src.val,
>> -                                     2, ctxt)) != X86EMUL_OKAY )
>> -                    goto done;
>> -                emulate_fpu_insn_memsrc(b, modrm_reg & 7, src.val);
>> -                break;
>> +            case 4: /* fldenv */
>> +                /* Raise #MF now if there are pending unmasked exceptions. */
>> +                emulate_fpu_insn_stub(0xd9, 0xd0 /* fnop */);
> 
> Maybe it would make sense to have a wrapper for fnop?

I'm not convinced that would be worth it.

>> @@ -11648,6 +11651,89 @@ int x86_emul_blk(
>>  
>>  #ifndef X86EMUL_NO_FPU
>>  
>> +    case blk_fld:
>> +        ASSERT(!data);
>> +
>> +        /* state->rex_prefix carries CR0.PE && !EFLAGS.VM setting */
>> +        switch ( bytes )
>> +        {
>> +        case sizeof(fpstate.env):
>> +        case sizeof(fpstate):
>> +            memcpy(&fpstate.env, ptr, sizeof(fpstate.env));
>> +            if ( !state->rex_prefix )
>> +            {
>> +                unsigned int fip = fpstate.env.mode.real.fip_lo +
>> +                                   (fpstate.env.mode.real.fip_hi << 16);
>> +                unsigned int fdp = fpstate.env.mode.real.fdp_lo +
>> +                                   (fpstate.env.mode.real.fdp_hi << 16);
>> +                unsigned int fop = fpstate.env.mode.real.fop;
>> +
>> +                fpstate.env.mode.prot.fip = fip & 0xf;
>> +                fpstate.env.mode.prot.fcs = fip >> 4;
>> +                fpstate.env.mode.prot.fop = fop;
>> +                fpstate.env.mode.prot.fdp = fdp & 0xf;
>> +                fpstate.env.mode.prot.fds = fdp >> 4;
> 
> I've found the layouts in the SDM vol. 1, but I haven't been able to
> found the translation mechanism from real to protected. Could you
> maybe add a reference here?

A reference to some piece of documentation? I don't think this
is spelled out anywhere. It's also only one of various possible
ways of doing the translation, but among them the most flexible
one for possible consumers of the data (because of using the
smallest possible offsets into the segments).

>> +            }
>> +
>> +            if ( bytes == sizeof(fpstate.env) )
>> +                ptr = NULL;
>> +            else
>> +                ptr += sizeof(fpstate.env);
>> +            break;
>> +
>> +        case sizeof(struct x87_env16):
>> +        case sizeof(struct x87_env16) + sizeof(fpstate.freg):
>> +        {
>> +            const struct x87_env16 *env = ptr;
>> +
>> +            fpstate.env.fcw = env->fcw;
>> +            fpstate.env.fsw = env->fsw;
>> +            fpstate.env.ftw = env->ftw;
>> +
>> +            if ( state->rex_prefix )
>> +            {
>> +                fpstate.env.mode.prot.fip = env->mode.prot.fip;
>> +                fpstate.env.mode.prot.fcs = env->mode.prot.fcs;
>> +                fpstate.env.mode.prot.fdp = env->mode.prot.fdp;
>> +                fpstate.env.mode.prot.fds = env->mode.prot.fds;
>> +                fpstate.env.mode.prot.fop = 0; /* unknown */
>> +            }
>> +            else
>> +            {
>> +                unsigned int fip = env->mode.real.fip_lo +
>> +                                   (env->mode.real.fip_hi << 16);
>> +                unsigned int fdp = env->mode.real.fdp_lo +
>> +                                   (env->mode.real.fdp_hi << 16);
>> +                unsigned int fop = env->mode.real.fop;
>> +
>> +                fpstate.env.mode.prot.fip = fip & 0xf;
>> +                fpstate.env.mode.prot.fcs = fip >> 4;
>> +                fpstate.env.mode.prot.fop = fop;
>> +                fpstate.env.mode.prot.fdp = fdp & 0xf;
>> +                fpstate.env.mode.prot.fds = fdp >> 4;
> 
> This looks mostly the same as the translation done above, so maybe
> could be abstracted anyway in a macro to avoid the code repetition?
> (ie: fpstate_real_to_prot(src, dst) or some such).

Just the 5 assignments could be put in an inline function, but
if we also wanted to abstract away the declarations with their
initializers, it would need to be a macro because of the
different types of fpstate.env and *env. While I'd generally
prefer inline functions, the macro would have the benefit that
it could be #define-d / #undef-d right inside this case block.
Thoughts? 

Jan
Roger Pau Monné May 8, 2020, 4:21 p.m. UTC | #3
On Fri, May 08, 2020 at 05:04:02PM +0200, Jan Beulich wrote:
> On 08.05.2020 15:37, Roger Pau Monné wrote:
> > On Tue, May 05, 2020 at 10:16:20AM +0200, Jan Beulich wrote:
> >> --- a/tools/tests/x86_emulator/test_x86_emulator.c
> >> +++ b/tools/tests/x86_emulator/test_x86_emulator.c
> >> @@ -11648,6 +11651,89 @@ int x86_emul_blk(
> >>  
> >>  #ifndef X86EMUL_NO_FPU
> >>  
> >> +    case blk_fld:
> >> +        ASSERT(!data);
> >> +
> >> +        /* state->rex_prefix carries CR0.PE && !EFLAGS.VM setting */
> >> +        switch ( bytes )
> >> +        {
> >> +        case sizeof(fpstate.env):
> >> +        case sizeof(fpstate):
> >> +            memcpy(&fpstate.env, ptr, sizeof(fpstate.env));
> >> +            if ( !state->rex_prefix )
> >> +            {
> >> +                unsigned int fip = fpstate.env.mode.real.fip_lo +
> >> +                                   (fpstate.env.mode.real.fip_hi << 16);
> >> +                unsigned int fdp = fpstate.env.mode.real.fdp_lo +
> >> +                                   (fpstate.env.mode.real.fdp_hi << 16);
> >> +                unsigned int fop = fpstate.env.mode.real.fop;
> >> +
> >> +                fpstate.env.mode.prot.fip = fip & 0xf;
> >> +                fpstate.env.mode.prot.fcs = fip >> 4;
> >> +                fpstate.env.mode.prot.fop = fop;
> >> +                fpstate.env.mode.prot.fdp = fdp & 0xf;
> >> +                fpstate.env.mode.prot.fds = fdp >> 4;
> > 
> > I've found the layouts in the SDM vol. 1, but I haven't been able to
> > found the translation mechanism from real to protected. Could you
> > maybe add a reference here?
> 
> A reference to some piece of documentation? I don't think this
> is spelled out anywhere. It's also only one of various possible
> ways of doing the translation, but among them the most flexible
> one for possible consumers of the data (because of using the
> smallest possible offsets into the segments).

Having this written down as a comment would help, but maybe that's
just because I'm not familiar at all with all this stuff.

Again, likely a very stupid question, but I would expect:

fpstate.env.mode.prot.fip = fip;

Without the mask.

> >> +            }
> >> +
> >> +            if ( bytes == sizeof(fpstate.env) )
> >> +                ptr = NULL;
> >> +            else
> >> +                ptr += sizeof(fpstate.env);
> >> +            break;
> >> +
> >> +        case sizeof(struct x87_env16):
> >> +        case sizeof(struct x87_env16) + sizeof(fpstate.freg):
> >> +        {
> >> +            const struct x87_env16 *env = ptr;
> >> +
> >> +            fpstate.env.fcw = env->fcw;
> >> +            fpstate.env.fsw = env->fsw;
> >> +            fpstate.env.ftw = env->ftw;
> >> +
> >> +            if ( state->rex_prefix )
> >> +            {
> >> +                fpstate.env.mode.prot.fip = env->mode.prot.fip;
> >> +                fpstate.env.mode.prot.fcs = env->mode.prot.fcs;
> >> +                fpstate.env.mode.prot.fdp = env->mode.prot.fdp;
> >> +                fpstate.env.mode.prot.fds = env->mode.prot.fds;
> >> +                fpstate.env.mode.prot.fop = 0; /* unknown */
> >> +            }
> >> +            else
> >> +            {
> >> +                unsigned int fip = env->mode.real.fip_lo +
> >> +                                   (env->mode.real.fip_hi << 16);
> >> +                unsigned int fdp = env->mode.real.fdp_lo +
> >> +                                   (env->mode.real.fdp_hi << 16);
> >> +                unsigned int fop = env->mode.real.fop;
> >> +
> >> +                fpstate.env.mode.prot.fip = fip & 0xf;
> >> +                fpstate.env.mode.prot.fcs = fip >> 4;
> >> +                fpstate.env.mode.prot.fop = fop;
> >> +                fpstate.env.mode.prot.fdp = fdp & 0xf;
> >> +                fpstate.env.mode.prot.fds = fdp >> 4;
> > 
> > This looks mostly the same as the translation done above, so maybe
> > could be abstracted anyway in a macro to avoid the code repetition?
> > (ie: fpstate_real_to_prot(src, dst) or some such).
> 
> Just the 5 assignments could be put in an inline function, but
> if we also wanted to abstract away the declarations with their
> initializers, it would need to be a macro because of the
> different types of fpstate.env and *env. While I'd generally
> prefer inline functions, the macro would have the benefit that
> it could be #define-d / #undef-d right inside this case block.
> Thoughts? 

I think a macro would be fine IMO.

Thanks, Roger.
Andrew Cooper May 8, 2020, 6:19 p.m. UTC | #4
On 05/05/2020 09:16, Jan Beulich wrote:
> While the Intel SDM claims that FRSTOR itself may raise #MF upon
> completion, this was confirmed by Intel to be a doc error which will be
> corrected in due course; behavior is like FLDENV, and like old hard copy
> manuals describe it. Otherwise we'd have to emulate the insn by filling
> st(N) in suitable order, followed by FLDENV.

I wouldn't bother calling this out at all.  We know the doc is going to
be corrected in the next revision, and "what we would have done if the
docs error was in fact accurate" only adds confusion to an already
complicated change.

> --- a/xen/arch/x86/x86_emulate/x86_emulate.c
> +++ b/xen/arch/x86/x86_emulate/x86_emulate.c
> @@ -857,6 +857,7 @@ struct x86_emulate_state {
>          blk_NONE,
>          blk_enqcmd,
>  #ifndef X86EMUL_NO_FPU
> +        blk_fld, /* FLDENV, FRSTOR */
>          blk_fst, /* FNSTENV, FNSAVE */
>  #endif
>          blk_movdir,
> @@ -4948,21 +4949,14 @@ x86_emulate(
>                  dst.bytes = 4;
>                  emulate_fpu_insn_memdst(b, modrm_reg & 7, dst.val);
>                  break;
> -            case 4: /* fldenv - TODO */
> -                state->fpu_ctrl = true;
> -                goto unimplemented_insn;
> -            case 5: /* fldcw m2byte */
> -                state->fpu_ctrl = true;
> -            fpu_memsrc16:
> -                if ( (rc = ops->read(ea.mem.seg, ea.mem.off, &src.val,
> -                                     2, ctxt)) != X86EMUL_OKAY )
> -                    goto done;
> -                emulate_fpu_insn_memsrc(b, modrm_reg & 7, src.val);
> -                break;

The commit message should however note that the larger-than-expected
diff is purely code motion for case 5, to arrange fldenv to fall though
into fstenv.

Alternatively, could be pulled out into an earlier patch.

> +            case 4: /* fldenv */
> +                /* Raise #MF now if there are pending unmasked exceptions. */
> +                emulate_fpu_insn_stub(0xd9, 0xd0 /* fnop */);
> +                /* fall through */
>              case 6: /* fnstenv */
>                  fail_if(!ops->blk);
> -                state->blk = blk_fst;
> -                /* REX is meaningless for this insn by this point. */
> +                state->blk = modrm_reg & 2 ? blk_fst : blk_fld;
> +                /* REX is meaningless for these insns by this point. */
>                  rex_prefix = in_protmode(ctxt, ops);
>                  if ( (rc = ops->blk(ea.mem.seg, ea.mem.off, NULL,
>                                      op_bytes > 2 ? sizeof(struct x87_env32)
> @@ -4972,6 +4966,14 @@ x86_emulate(
>                      goto done;
>                  state->fpu_ctrl = true;
>                  break;
> +            case 5: /* fldcw m2byte */
> +                state->fpu_ctrl = true;
> +            fpu_memsrc16:
> +                if ( (rc = ops->read(ea.mem.seg, ea.mem.off, &src.val,
> +                                     2, ctxt)) != X86EMUL_OKAY )
> +                    goto done;
> +                emulate_fpu_insn_memsrc(b, modrm_reg & 7, src.val);
> +                break;
>              case 7: /* fnstcw m2byte */
>                  state->fpu_ctrl = true;
>              fpu_memdst16:
> @@ -5124,13 +5126,14 @@ x86_emulate(
>                  dst.bytes = 8;
>                  emulate_fpu_insn_memdst(b, modrm_reg & 7, dst.val);
>                  break;
> -            case 4: /* frstor - TODO */
> -                state->fpu_ctrl = true;
> -                goto unimplemented_insn;
> +            case 4: /* frstor */
> +                /* Raise #MF now if there are pending unmasked exceptions. */
> +                emulate_fpu_insn_stub(0xd9, 0xd0 /* fnop */);
> +                /* fall through */
>              case 6: /* fnsave */
>                  fail_if(!ops->blk);
> -                state->blk = blk_fst;
> -                /* REX is meaningless for this insn by this point. */
> +                state->blk = modrm_reg & 2 ? blk_fst : blk_fld;
> +                /* REX is meaningless for these insns by this point. */
>                  rex_prefix = in_protmode(ctxt, ops);
>                  if ( (rc = ops->blk(ea.mem.seg, ea.mem.off, NULL,
>                                      op_bytes > 2 ? sizeof(struct x87_env32) + 80
> @@ -11648,6 +11651,89 @@ int x86_emul_blk(
>  
>  #ifndef X86EMUL_NO_FPU
>  
> +    case blk_fld:
> +        ASSERT(!data);
> +
> +        /* state->rex_prefix carries CR0.PE && !EFLAGS.VM setting */
> +        switch ( bytes )
> +        {
> +        case sizeof(fpstate.env):
> +        case sizeof(fpstate):
> +            memcpy(&fpstate.env, ptr, sizeof(fpstate.env));
> +            if ( !state->rex_prefix )
> +            {
> +                unsigned int fip = fpstate.env.mode.real.fip_lo +
> +                                   (fpstate.env.mode.real.fip_hi << 16);
> +                unsigned int fdp = fpstate.env.mode.real.fdp_lo +
> +                                   (fpstate.env.mode.real.fdp_hi << 16);
> +                unsigned int fop = fpstate.env.mode.real.fop;
> +
> +                fpstate.env.mode.prot.fip = fip & 0xf;
> +                fpstate.env.mode.prot.fcs = fip >> 4;
> +                fpstate.env.mode.prot.fop = fop;
> +                fpstate.env.mode.prot.fdp = fdp & 0xf;
> +                fpstate.env.mode.prot.fds = fdp >> 4;

The same comments about comments apply here from the previous patch.

~Andrew
Andrew Cooper May 8, 2020, 6:29 p.m. UTC | #5
On 08/05/2020 16:04, Jan Beulich wrote:
>>> +            }
>>> +
>>> +            if ( bytes == sizeof(fpstate.env) )
>>> +                ptr = NULL;
>>> +            else
>>> +                ptr += sizeof(fpstate.env);
>>> +            break;
>>> +
>>> +        case sizeof(struct x87_env16):
>>> +        case sizeof(struct x87_env16) + sizeof(fpstate.freg):
>>> +        {
>>> +            const struct x87_env16 *env = ptr;
>>> +
>>> +            fpstate.env.fcw = env->fcw;
>>> +            fpstate.env.fsw = env->fsw;
>>> +            fpstate.env.ftw = env->ftw;
>>> +
>>> +            if ( state->rex_prefix )
>>> +            {
>>> +                fpstate.env.mode.prot.fip = env->mode.prot.fip;
>>> +                fpstate.env.mode.prot.fcs = env->mode.prot.fcs;
>>> +                fpstate.env.mode.prot.fdp = env->mode.prot.fdp;
>>> +                fpstate.env.mode.prot.fds = env->mode.prot.fds;
>>> +                fpstate.env.mode.prot.fop = 0; /* unknown */
>>> +            }
>>> +            else
>>> +            {
>>> +                unsigned int fip = env->mode.real.fip_lo +
>>> +                                   (env->mode.real.fip_hi << 16);
>>> +                unsigned int fdp = env->mode.real.fdp_lo +
>>> +                                   (env->mode.real.fdp_hi << 16);
>>> +                unsigned int fop = env->mode.real.fop;
>>> +
>>> +                fpstate.env.mode.prot.fip = fip & 0xf;
>>> +                fpstate.env.mode.prot.fcs = fip >> 4;
>>> +                fpstate.env.mode.prot.fop = fop;
>>> +                fpstate.env.mode.prot.fdp = fdp & 0xf;
>>> +                fpstate.env.mode.prot.fds = fdp >> 4;
>> This looks mostly the same as the translation done above, so maybe
>> could be abstracted anyway in a macro to avoid the code repetition?
>> (ie: fpstate_real_to_prot(src, dst) or some such).
> Just the 5 assignments could be put in an inline function, but
> if we also wanted to abstract away the declarations with their
> initializers, it would need to be a macro because of the
> different types of fpstate.env and *env. While I'd generally
> prefer inline functions, the macro would have the benefit that
> it could be #define-d / #undef-d right inside this case block.
> Thoughts?

Code like this is large in terms of volume, but it is completely crystal
clear (with the requested comments in place) and easy to follow.

I don't see how attempting to abstract out two small portions is going
to be an improvement.

~Andrew
Jan Beulich May 11, 2020, 7:25 a.m. UTC | #6
On 08.05.2020 20:29, Andrew Cooper wrote:
> On 08/05/2020 16:04, Jan Beulich wrote:
>>>> +            }
>>>> +
>>>> +            if ( bytes == sizeof(fpstate.env) )
>>>> +                ptr = NULL;
>>>> +            else
>>>> +                ptr += sizeof(fpstate.env);
>>>> +            break;
>>>> +
>>>> +        case sizeof(struct x87_env16):
>>>> +        case sizeof(struct x87_env16) + sizeof(fpstate.freg):
>>>> +        {
>>>> +            const struct x87_env16 *env = ptr;
>>>> +
>>>> +            fpstate.env.fcw = env->fcw;
>>>> +            fpstate.env.fsw = env->fsw;
>>>> +            fpstate.env.ftw = env->ftw;
>>>> +
>>>> +            if ( state->rex_prefix )
>>>> +            {
>>>> +                fpstate.env.mode.prot.fip = env->mode.prot.fip;
>>>> +                fpstate.env.mode.prot.fcs = env->mode.prot.fcs;
>>>> +                fpstate.env.mode.prot.fdp = env->mode.prot.fdp;
>>>> +                fpstate.env.mode.prot.fds = env->mode.prot.fds;
>>>> +                fpstate.env.mode.prot.fop = 0; /* unknown */
>>>> +            }
>>>> +            else
>>>> +            {
>>>> +                unsigned int fip = env->mode.real.fip_lo +
>>>> +                                   (env->mode.real.fip_hi << 16);
>>>> +                unsigned int fdp = env->mode.real.fdp_lo +
>>>> +                                   (env->mode.real.fdp_hi << 16);
>>>> +                unsigned int fop = env->mode.real.fop;
>>>> +
>>>> +                fpstate.env.mode.prot.fip = fip & 0xf;
>>>> +                fpstate.env.mode.prot.fcs = fip >> 4;
>>>> +                fpstate.env.mode.prot.fop = fop;
>>>> +                fpstate.env.mode.prot.fdp = fdp & 0xf;
>>>> +                fpstate.env.mode.prot.fds = fdp >> 4;
>>> This looks mostly the same as the translation done above, so maybe
>>> could be abstracted anyway in a macro to avoid the code repetition?
>>> (ie: fpstate_real_to_prot(src, dst) or some such).
>> Just the 5 assignments could be put in an inline function, but
>> if we also wanted to abstract away the declarations with their
>> initializers, it would need to be a macro because of the
>> different types of fpstate.env and *env. While I'd generally
>> prefer inline functions, the macro would have the benefit that
>> it could be #define-d / #undef-d right inside this case block.
>> Thoughts?
> 
> Code like this is large in terms of volume, but it is completely crystal
> clear (with the requested comments in place) and easy to follow.
> 
> I don't see how attempting to abstract out two small portions is going
> to be an improvement.

Okay, easier for me if I don't need to touch it. Roger, can you
live with that?

Jan
Jan Beulich May 11, 2020, 7:29 a.m. UTC | #7
On 08.05.2020 18:21, Roger Pau Monné wrote:
> On Fri, May 08, 2020 at 05:04:02PM +0200, Jan Beulich wrote:
>> On 08.05.2020 15:37, Roger Pau Monné wrote:
>>> On Tue, May 05, 2020 at 10:16:20AM +0200, Jan Beulich wrote:
>>>> --- a/tools/tests/x86_emulator/test_x86_emulator.c
>>>> +++ b/tools/tests/x86_emulator/test_x86_emulator.c
>>>> @@ -11648,6 +11651,89 @@ int x86_emul_blk(
>>>>  
>>>>  #ifndef X86EMUL_NO_FPU
>>>>  
>>>> +    case blk_fld:
>>>> +        ASSERT(!data);
>>>> +
>>>> +        /* state->rex_prefix carries CR0.PE && !EFLAGS.VM setting */
>>>> +        switch ( bytes )
>>>> +        {
>>>> +        case sizeof(fpstate.env):
>>>> +        case sizeof(fpstate):
>>>> +            memcpy(&fpstate.env, ptr, sizeof(fpstate.env));
>>>> +            if ( !state->rex_prefix )
>>>> +            {
>>>> +                unsigned int fip = fpstate.env.mode.real.fip_lo +
>>>> +                                   (fpstate.env.mode.real.fip_hi << 16);
>>>> +                unsigned int fdp = fpstate.env.mode.real.fdp_lo +
>>>> +                                   (fpstate.env.mode.real.fdp_hi << 16);
>>>> +                unsigned int fop = fpstate.env.mode.real.fop;
>>>> +
>>>> +                fpstate.env.mode.prot.fip = fip & 0xf;
>>>> +                fpstate.env.mode.prot.fcs = fip >> 4;
>>>> +                fpstate.env.mode.prot.fop = fop;
>>>> +                fpstate.env.mode.prot.fdp = fdp & 0xf;
>>>> +                fpstate.env.mode.prot.fds = fdp >> 4;
>>>
>>> I've found the layouts in the SDM vol. 1, but I haven't been able to
>>> found the translation mechanism from real to protected. Could you
>>> maybe add a reference here?
>>
>> A reference to some piece of documentation? I don't think this
>> is spelled out anywhere. It's also only one of various possible
>> ways of doing the translation, but among them the most flexible
>> one for possible consumers of the data (because of using the
>> smallest possible offsets into the segments).
> 
> Having this written down as a comment would help, but maybe that's
> just because I'm not familiar at all with all this stuff.
> 
> Again, likely a very stupid question, but I would expect:
> 
> fpstate.env.mode.prot.fip = fip;
> 
> Without the mask.

How that? A linear address has many ways of decomposing into a
real/vm86 mode ssss:oooo pair, but what you suggest is not one
of them. The other extreme to the one chosen would be

                fpstate.env.mode.prot.fip = fip & 0xffff;
                fpstate.env.mode.prot.fcs = (fip >> 4) & 0xf000;

Except that when doing it this way, even the full insn (or for
fcs:fdp the full operand) may not be accessible through the
resulting ssss, due to segment wraparound.

Jan
Roger Pau Monné May 11, 2020, 8:02 a.m. UTC | #8
On Mon, May 11, 2020 at 09:25:54AM +0200, Jan Beulich wrote:
> [CAUTION - EXTERNAL EMAIL] DO NOT reply, click links, or open attachments unless you have verified the sender and know the content is safe.
> 
> On 08.05.2020 20:29, Andrew Cooper wrote:
> > On 08/05/2020 16:04, Jan Beulich wrote:
> >>>> +            }
> >>>> +
> >>>> +            if ( bytes == sizeof(fpstate.env) )
> >>>> +                ptr = NULL;
> >>>> +            else
> >>>> +                ptr += sizeof(fpstate.env);
> >>>> +            break;
> >>>> +
> >>>> +        case sizeof(struct x87_env16):
> >>>> +        case sizeof(struct x87_env16) + sizeof(fpstate.freg):
> >>>> +        {
> >>>> +            const struct x87_env16 *env = ptr;
> >>>> +
> >>>> +            fpstate.env.fcw = env->fcw;
> >>>> +            fpstate.env.fsw = env->fsw;
> >>>> +            fpstate.env.ftw = env->ftw;
> >>>> +
> >>>> +            if ( state->rex_prefix )
> >>>> +            {
> >>>> +                fpstate.env.mode.prot.fip = env->mode.prot.fip;
> >>>> +                fpstate.env.mode.prot.fcs = env->mode.prot.fcs;
> >>>> +                fpstate.env.mode.prot.fdp = env->mode.prot.fdp;
> >>>> +                fpstate.env.mode.prot.fds = env->mode.prot.fds;
> >>>> +                fpstate.env.mode.prot.fop = 0; /* unknown */
> >>>> +            }
> >>>> +            else
> >>>> +            {
> >>>> +                unsigned int fip = env->mode.real.fip_lo +
> >>>> +                                   (env->mode.real.fip_hi << 16);
> >>>> +                unsigned int fdp = env->mode.real.fdp_lo +
> >>>> +                                   (env->mode.real.fdp_hi << 16);
> >>>> +                unsigned int fop = env->mode.real.fop;
> >>>> +
> >>>> +                fpstate.env.mode.prot.fip = fip & 0xf;
> >>>> +                fpstate.env.mode.prot.fcs = fip >> 4;
> >>>> +                fpstate.env.mode.prot.fop = fop;
> >>>> +                fpstate.env.mode.prot.fdp = fdp & 0xf;
> >>>> +                fpstate.env.mode.prot.fds = fdp >> 4;
> >>> This looks mostly the same as the translation done above, so maybe
> >>> could be abstracted anyway in a macro to avoid the code repetition?
> >>> (ie: fpstate_real_to_prot(src, dst) or some such).
> >> Just the 5 assignments could be put in an inline function, but
> >> if we also wanted to abstract away the declarations with their
> >> initializers, it would need to be a macro because of the
> >> different types of fpstate.env and *env. While I'd generally
> >> prefer inline functions, the macro would have the benefit that
> >> it could be #define-d / #undef-d right inside this case block.
> >> Thoughts?
> > 
> > Code like this is large in terms of volume, but it is completely crystal
> > clear (with the requested comments in place) and easy to follow.
> > 
> > I don't see how attempting to abstract out two small portions is going
> > to be an improvement.
> 
> Okay, easier for me if I don't need to touch it. Roger, can you
> live with that?

Yes, that's fine.

Thanks.
Roger Pau Monné May 11, 2020, 9:22 a.m. UTC | #9
On Mon, May 11, 2020 at 09:29:27AM +0200, Jan Beulich wrote:
> On 08.05.2020 18:21, Roger Pau Monné wrote:
> > On Fri, May 08, 2020 at 05:04:02PM +0200, Jan Beulich wrote:
> >> On 08.05.2020 15:37, Roger Pau Monné wrote:
> >>> On Tue, May 05, 2020 at 10:16:20AM +0200, Jan Beulich wrote:
> >>>> --- a/tools/tests/x86_emulator/test_x86_emulator.c
> >>>> +++ b/tools/tests/x86_emulator/test_x86_emulator.c
> >>>> @@ -11648,6 +11651,89 @@ int x86_emul_blk(
> >>>>  
> >>>>  #ifndef X86EMUL_NO_FPU
> >>>>  
> >>>> +    case blk_fld:
> >>>> +        ASSERT(!data);
> >>>> +
> >>>> +        /* state->rex_prefix carries CR0.PE && !EFLAGS.VM setting */
> >>>> +        switch ( bytes )
> >>>> +        {
> >>>> +        case sizeof(fpstate.env):
> >>>> +        case sizeof(fpstate):
> >>>> +            memcpy(&fpstate.env, ptr, sizeof(fpstate.env));
> >>>> +            if ( !state->rex_prefix )
> >>>> +            {
> >>>> +                unsigned int fip = fpstate.env.mode.real.fip_lo +
> >>>> +                                   (fpstate.env.mode.real.fip_hi << 16);
> >>>> +                unsigned int fdp = fpstate.env.mode.real.fdp_lo +
> >>>> +                                   (fpstate.env.mode.real.fdp_hi << 16);
> >>>> +                unsigned int fop = fpstate.env.mode.real.fop;
> >>>> +
> >>>> +                fpstate.env.mode.prot.fip = fip & 0xf;
> >>>> +                fpstate.env.mode.prot.fcs = fip >> 4;
> >>>> +                fpstate.env.mode.prot.fop = fop;
> >>>> +                fpstate.env.mode.prot.fdp = fdp & 0xf;
> >>>> +                fpstate.env.mode.prot.fds = fdp >> 4;
> >>>
> >>> I've found the layouts in the SDM vol. 1, but I haven't been able to
> >>> found the translation mechanism from real to protected. Could you
> >>> maybe add a reference here?
> >>
> >> A reference to some piece of documentation? I don't think this
> >> is spelled out anywhere. It's also only one of various possible
> >> ways of doing the translation, but among them the most flexible
> >> one for possible consumers of the data (because of using the
> >> smallest possible offsets into the segments).
> > 
> > Having this written down as a comment would help, but maybe that's
> > just because I'm not familiar at all with all this stuff.
> > 
> > Again, likely a very stupid question, but I would expect:
> > 
> > fpstate.env.mode.prot.fip = fip;
> > 
> > Without the mask.
> 
> How that? A linear address has many ways of decomposing into a
> real/vm86 mode ssss:oooo pair, but what you suggest is not one
> of them. The other extreme to the one chosen would be
> 
>                 fpstate.env.mode.prot.fip = fip & 0xffff;
>                 fpstate.env.mode.prot.fcs = (fip >> 4) & 0xf000;
> 
> Except that when doing it this way, even the full insn (or for
> fcs:fdp the full operand) may not be accessible through the
> resulting ssss, due to segment wraparound.

Thanks for the explanation. I see it's better to split the offset into
the lower 4 bytes only in order to prevent overflow.

Roger.
diff mbox series

Patch

--- a/tools/tests/x86_emulator/test_x86_emulator.c
+++ b/tools/tests/x86_emulator/test_x86_emulator.c
@@ -2442,6 +2442,27 @@  int main(int argc, char **argv)
     else
         printf("skipped\n");
 
+    printf("%-40s", "Testing fldenv 8(%edx)...");
+    if ( stack_exec && cpu_has_fpu )
+    {
+        asm volatile ( "fnstenv %0\n\t"
+                       "fninit"
+                       : "=m" (res[2]) :: "memory" );
+        zap_fpsel(&res[2], true);
+        instr[0] = 0xd9; instr[1] = 0x62; instr[2] = 0x08;
+        regs.eip = (unsigned long)&instr[0];
+        regs.edx = (unsigned long)res;
+        rc = x86_emulate(&ctxt, &emulops);
+        asm volatile ( "fnstenv %0" : "=m" (res[9]) :: "memory" );
+        if ( (rc != X86EMUL_OKAY) ||
+             memcmp(res + 2, res + 9, 28) ||
+             (regs.eip != (unsigned long)&instr[3]) )
+            goto fail;
+        printf("okay\n");
+    }
+    else
+        printf("skipped\n");
+
     printf("%-40s", "Testing 16-bit fnsave (%ecx)...");
     if ( stack_exec && cpu_has_fpu )
     {
@@ -2468,6 +2489,31 @@  int main(int argc, char **argv)
             goto fail;
         printf("okay\n");
     }
+    else
+        printf("skipped\n");
+
+    printf("%-40s", "Testing frstor (%edx)...");
+    if ( stack_exec && cpu_has_fpu )
+    {
+        const uint16_t seven = 7;
+
+        asm volatile ( "fninit\n\t"
+                       "fld1\n\t"
+                       "fidivs %1\n\t"
+                       "fnsave %0\n\t"
+                       : "=&m" (res[0]) : "m" (seven) : "memory" );
+        zap_fpsel(&res[0], true);
+        instr[0] = 0xdd; instr[1] = 0x22;
+        regs.eip = (unsigned long)&instr[0];
+        regs.edx = (unsigned long)res;
+        rc = x86_emulate(&ctxt, &emulops);
+        asm volatile ( "fnsave %0" : "=m" (res[27]) :: "memory" );
+        if ( (rc != X86EMUL_OKAY) ||
+             memcmp(res, res + 27, 108) ||
+             (regs.eip != (unsigned long)&instr[2]) )
+            goto fail;
+        printf("okay\n");
+    }
     else
         printf("skipped\n");
 
--- a/xen/arch/x86/x86_emulate/x86_emulate.c
+++ b/xen/arch/x86/x86_emulate/x86_emulate.c
@@ -857,6 +857,7 @@  struct x86_emulate_state {
         blk_NONE,
         blk_enqcmd,
 #ifndef X86EMUL_NO_FPU
+        blk_fld, /* FLDENV, FRSTOR */
         blk_fst, /* FNSTENV, FNSAVE */
 #endif
         blk_movdir,
@@ -4948,21 +4949,14 @@  x86_emulate(
                 dst.bytes = 4;
                 emulate_fpu_insn_memdst(b, modrm_reg & 7, dst.val);
                 break;
-            case 4: /* fldenv - TODO */
-                state->fpu_ctrl = true;
-                goto unimplemented_insn;
-            case 5: /* fldcw m2byte */
-                state->fpu_ctrl = true;
-            fpu_memsrc16:
-                if ( (rc = ops->read(ea.mem.seg, ea.mem.off, &src.val,
-                                     2, ctxt)) != X86EMUL_OKAY )
-                    goto done;
-                emulate_fpu_insn_memsrc(b, modrm_reg & 7, src.val);
-                break;
+            case 4: /* fldenv */
+                /* Raise #MF now if there are pending unmasked exceptions. */
+                emulate_fpu_insn_stub(0xd9, 0xd0 /* fnop */);
+                /* fall through */
             case 6: /* fnstenv */
                 fail_if(!ops->blk);
-                state->blk = blk_fst;
-                /* REX is meaningless for this insn by this point. */
+                state->blk = modrm_reg & 2 ? blk_fst : blk_fld;
+                /* REX is meaningless for these insns by this point. */
                 rex_prefix = in_protmode(ctxt, ops);
                 if ( (rc = ops->blk(ea.mem.seg, ea.mem.off, NULL,
                                     op_bytes > 2 ? sizeof(struct x87_env32)
@@ -4972,6 +4966,14 @@  x86_emulate(
                     goto done;
                 state->fpu_ctrl = true;
                 break;
+            case 5: /* fldcw m2byte */
+                state->fpu_ctrl = true;
+            fpu_memsrc16:
+                if ( (rc = ops->read(ea.mem.seg, ea.mem.off, &src.val,
+                                     2, ctxt)) != X86EMUL_OKAY )
+                    goto done;
+                emulate_fpu_insn_memsrc(b, modrm_reg & 7, src.val);
+                break;
             case 7: /* fnstcw m2byte */
                 state->fpu_ctrl = true;
             fpu_memdst16:
@@ -5124,13 +5126,14 @@  x86_emulate(
                 dst.bytes = 8;
                 emulate_fpu_insn_memdst(b, modrm_reg & 7, dst.val);
                 break;
-            case 4: /* frstor - TODO */
-                state->fpu_ctrl = true;
-                goto unimplemented_insn;
+            case 4: /* frstor */
+                /* Raise #MF now if there are pending unmasked exceptions. */
+                emulate_fpu_insn_stub(0xd9, 0xd0 /* fnop */);
+                /* fall through */
             case 6: /* fnsave */
                 fail_if(!ops->blk);
-                state->blk = blk_fst;
-                /* REX is meaningless for this insn by this point. */
+                state->blk = modrm_reg & 2 ? blk_fst : blk_fld;
+                /* REX is meaningless for these insns by this point. */
                 rex_prefix = in_protmode(ctxt, ops);
                 if ( (rc = ops->blk(ea.mem.seg, ea.mem.off, NULL,
                                     op_bytes > 2 ? sizeof(struct x87_env32) + 80
@@ -11648,6 +11651,89 @@  int x86_emul_blk(
 
 #ifndef X86EMUL_NO_FPU
 
+    case blk_fld:
+        ASSERT(!data);
+
+        /* state->rex_prefix carries CR0.PE && !EFLAGS.VM setting */
+        switch ( bytes )
+        {
+        case sizeof(fpstate.env):
+        case sizeof(fpstate):
+            memcpy(&fpstate.env, ptr, sizeof(fpstate.env));
+            if ( !state->rex_prefix )
+            {
+                unsigned int fip = fpstate.env.mode.real.fip_lo +
+                                   (fpstate.env.mode.real.fip_hi << 16);
+                unsigned int fdp = fpstate.env.mode.real.fdp_lo +
+                                   (fpstate.env.mode.real.fdp_hi << 16);
+                unsigned int fop = fpstate.env.mode.real.fop;
+
+                fpstate.env.mode.prot.fip = fip & 0xf;
+                fpstate.env.mode.prot.fcs = fip >> 4;
+                fpstate.env.mode.prot.fop = fop;
+                fpstate.env.mode.prot.fdp = fdp & 0xf;
+                fpstate.env.mode.prot.fds = fdp >> 4;
+            }
+
+            if ( bytes == sizeof(fpstate.env) )
+                ptr = NULL;
+            else
+                ptr += sizeof(fpstate.env);
+            break;
+
+        case sizeof(struct x87_env16):
+        case sizeof(struct x87_env16) + sizeof(fpstate.freg):
+        {
+            const struct x87_env16 *env = ptr;
+
+            fpstate.env.fcw = env->fcw;
+            fpstate.env.fsw = env->fsw;
+            fpstate.env.ftw = env->ftw;
+
+            if ( state->rex_prefix )
+            {
+                fpstate.env.mode.prot.fip = env->mode.prot.fip;
+                fpstate.env.mode.prot.fcs = env->mode.prot.fcs;
+                fpstate.env.mode.prot.fdp = env->mode.prot.fdp;
+                fpstate.env.mode.prot.fds = env->mode.prot.fds;
+                fpstate.env.mode.prot.fop = 0; /* unknown */
+            }
+            else
+            {
+                unsigned int fip = env->mode.real.fip_lo +
+                                   (env->mode.real.fip_hi << 16);
+                unsigned int fdp = env->mode.real.fdp_lo +
+                                   (env->mode.real.fdp_hi << 16);
+                unsigned int fop = env->mode.real.fop;
+
+                fpstate.env.mode.prot.fip = fip & 0xf;
+                fpstate.env.mode.prot.fcs = fip >> 4;
+                fpstate.env.mode.prot.fop = fop;
+                fpstate.env.mode.prot.fdp = fdp & 0xf;
+                fpstate.env.mode.prot.fds = fdp >> 4;
+            }
+
+            if ( bytes == sizeof(*env) )
+                ptr = NULL;
+            else
+                ptr += sizeof(*env);
+            break;
+        }
+
+        default:
+            ASSERT_UNREACHABLE();
+            return X86EMUL_UNHANDLEABLE;
+        }
+
+        if ( ptr )
+        {
+            memcpy(fpstate.freg, ptr, sizeof(fpstate.freg));
+            asm volatile ( "frstor %0" :: "m" (fpstate) );
+        }
+        else
+            asm volatile ( "fldenv %0" :: "m" (fpstate.env) );
+        break;
+
     case blk_fst:
         ASSERT(!data);