diff mbox series

[2/7] x86/emul: Fix and extend #DB trap handling

Message ID 20230915203628.837732-3-andrew.cooper3@citrix.com (mailing list archive)
State New, archived
Headers show
Series x86/pv: #DB vs %dr6 fixes, part 2 | expand

Commit Message

Andrew Cooper Sept. 15, 2023, 8:36 p.m. UTC
Lots of this is very very broken, but we need to start somewhere.

First, the bugfix.  Hooks which use X86EMUL_DONE to skip the general emulation
still need to evaluate singlestep as part of completing the instruction.
Defer the logic until X86EMUL_DONE has been converted to X86EMUL_OKAY.

Second, the improvement.  PENDING_DBG, INTERRUPTIBILITY and ACTIVITY are
internal pipeline state which Intel exposed to software in the VMCS, and AMD
exposed a subset of in the VMCB.  Importantly, bits set in PENDING_DBG can
survive across multiple instruction boundaries if e.g. delivery of #DB is
delayed by a MovSS.

For now, introduce a full pending_dbg field into the retire union.  This keeps
the sh_page_fault() and init_context() paths working but in due course the
field will want to lose the "retire" infix.

In addition, set singlestep into pending_dbg as appropriate.  Leave the old
singlestep bitfield in place until we can adjust the callers to the new
scheme.

Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
---
CC: Jan Beulich <JBeulich@suse.com>
CC: Roger Pau Monné <roger.pau@citrix.com>
CC: Wei Liu <wl@xen.org>
CC: Jinoh Kang <jinoh.kang.kr@gmail.com>

v2:
 * Only evaluate singlestep on X86EMUL_OKAY, but do so after X86EMUL_DONE has
   been adjusted to X86EMUL_OKAY.
 * Adjust comments in light of X86EMUL_DONE not getting back to callers.
---
 xen/arch/x86/x86_emulate/x86_emulate.c | 20 +++++++++++++-------
 xen/arch/x86/x86_emulate/x86_emulate.h | 14 +++++++++++---
 2 files changed, 24 insertions(+), 10 deletions(-)

Comments

Andrew Cooper Sept. 18, 2023, 9:24 a.m. UTC | #1
On 15/09/2023 9:36 pm, Andrew Cooper wrote:
> diff --git a/xen/arch/x86/x86_emulate/x86_emulate.c b/xen/arch/x86/x86_emulate/x86_emulate.c
> index 94caec1d142c..de7f99500e3f 100644
> --- a/xen/arch/x86/x86_emulate/x86_emulate.c
> +++ b/xen/arch/x86/x86_emulate/x86_emulate.c
> @@ -8379,13 +8379,6 @@ x86_emulate(
>      if ( !mode_64bit() )
>          _regs.r(ip) = (uint32_t)_regs.r(ip);
>  
> -    /* Should a singlestep #DB be raised? */
> -    if ( rc == X86EMUL_OKAY && singlestep && !ctxt->retire.mov_ss )
> -    {
> -        ctxt->retire.singlestep = true;
> -        ctxt->retire.sti = false;
> -    }
> -
>      if ( rc != X86EMUL_DONE )
>          *ctxt->regs = _regs;
>      else
> @@ -8394,6 +8387,19 @@ x86_emulate(
>          rc = X86EMUL_OKAY;
>      }
>  
> +    /* Should a singlestep #DB be raised? */
> +    if ( rc == X86EMUL_OKAY && singlestep )
> +    {
> +        ctxt->retire.pending_dbg |= X86_DR6_BS;
> +
> +        /* BROKEN - TODO, merge into pending_dbg. */
> +        if ( !ctxt->retire.mov_ss )
> +        {
> +            ctxt->retire.singlestep = true;
> +            ctxt->retire.sti = false;
> +        }

I occurs to me that setting X86_DR6_BS outside of the !mov_ss case will
break HVM (when HVM swaps from singlestep to pending_dbg) until one of
the further TODOs is addressed.

This will need bringing back within the conditional to avoid regressions
in the short term.

~Andrew
Jan Beulich Sept. 18, 2023, 11:28 a.m. UTC | #2
On 15.09.2023 22:36, Andrew Cooper wrote:
> Lots of this is very very broken, but we need to start somewhere.
> 
> First, the bugfix.  Hooks which use X86EMUL_DONE to skip the general emulation
> still need to evaluate singlestep as part of completing the instruction.
> Defer the logic until X86EMUL_DONE has been converted to X86EMUL_OKAY.

Doesn't this warrant a Fixes: tag against 4999bf3e8b4c?

Jan
Jan Beulich Sept. 18, 2023, 11:30 a.m. UTC | #3
On 18.09.2023 11:24, Andrew Cooper wrote:
> On 15/09/2023 9:36 pm, Andrew Cooper wrote:
>> --- a/xen/arch/x86/x86_emulate/x86_emulate.c
>> +++ b/xen/arch/x86/x86_emulate/x86_emulate.c
>> @@ -8379,13 +8379,6 @@ x86_emulate(
>>      if ( !mode_64bit() )
>>          _regs.r(ip) = (uint32_t)_regs.r(ip);
>>  
>> -    /* Should a singlestep #DB be raised? */
>> -    if ( rc == X86EMUL_OKAY && singlestep && !ctxt->retire.mov_ss )
>> -    {
>> -        ctxt->retire.singlestep = true;
>> -        ctxt->retire.sti = false;
>> -    }
>> -
>>      if ( rc != X86EMUL_DONE )
>>          *ctxt->regs = _regs;
>>      else
>> @@ -8394,6 +8387,19 @@ x86_emulate(
>>          rc = X86EMUL_OKAY;
>>      }
>>  
>> +    /* Should a singlestep #DB be raised? */
>> +    if ( rc == X86EMUL_OKAY && singlestep )
>> +    {
>> +        ctxt->retire.pending_dbg |= X86_DR6_BS;
>> +
>> +        /* BROKEN - TODO, merge into pending_dbg. */
>> +        if ( !ctxt->retire.mov_ss )
>> +        {
>> +            ctxt->retire.singlestep = true;
>> +            ctxt->retire.sti = false;
>> +        }
> 
> I occurs to me that setting X86_DR6_BS outside of the !mov_ss case will
> break HVM (when HVM swaps from singlestep to pending_dbg) until one of
> the further TODOs is addressed.
> 
> This will need bringing back within the conditional to avoid regressions
> in the short term.

I'm afraid I don't understand this: Isn't the purpose to latch state no
matter whether it'll be consumed right away, or only on the next insn?

Jan
Andrew Cooper Sept. 18, 2023, 12:19 p.m. UTC | #4
On 18/09/2023 12:30 pm, Jan Beulich wrote:
> On 18.09.2023 11:24, Andrew Cooper wrote:
>> On 15/09/2023 9:36 pm, Andrew Cooper wrote:
>>> --- a/xen/arch/x86/x86_emulate/x86_emulate.c
>>> +++ b/xen/arch/x86/x86_emulate/x86_emulate.c
>>> @@ -8379,13 +8379,6 @@ x86_emulate(
>>>      if ( !mode_64bit() )
>>>          _regs.r(ip) = (uint32_t)_regs.r(ip);
>>>  
>>> -    /* Should a singlestep #DB be raised? */
>>> -    if ( rc == X86EMUL_OKAY && singlestep && !ctxt->retire.mov_ss )
>>> -    {
>>> -        ctxt->retire.singlestep = true;
>>> -        ctxt->retire.sti = false;
>>> -    }
>>> -
>>>      if ( rc != X86EMUL_DONE )
>>>          *ctxt->regs = _regs;
>>>      else
>>> @@ -8394,6 +8387,19 @@ x86_emulate(
>>>          rc = X86EMUL_OKAY;
>>>      }
>>>  
>>> +    /* Should a singlestep #DB be raised? */
>>> +    if ( rc == X86EMUL_OKAY && singlestep )
>>> +    {
>>> +        ctxt->retire.pending_dbg |= X86_DR6_BS;
>>> +
>>> +        /* BROKEN - TODO, merge into pending_dbg. */
>>> +        if ( !ctxt->retire.mov_ss )
>>> +        {
>>> +            ctxt->retire.singlestep = true;
>>> +            ctxt->retire.sti = false;
>>> +        }
>> I occurs to me that setting X86_DR6_BS outside of the !mov_ss case will
>> break HVM (when HVM swaps from singlestep to pending_dbg) until one of
>> the further TODOs is addressed.
>>
>> This will need bringing back within the conditional to avoid regressions
>> in the short term.
> I'm afraid I don't understand this: Isn't the purpose to latch state no
> matter whether it'll be consumed right away, or only on the next insn?

Yes, that is the intention in the longterm.

But in the short term, where I'm doing just enough to fix the %dr6 bits,
putting this unconditionally in PENDING_DBG will break the emulation of
mov-to-ss until the bigger todo of "wire INTERRUPTIBILITY/ACTIVITY state
into the emulation context" is complete.

The latter is definitely too big to fit into 4.18, and I can't
intentionally regress mov-to-ss in a series we intend to backport.

~Andrew
Jan Beulich Sept. 18, 2023, 12:47 p.m. UTC | #5
On 18.09.2023 14:19, Andrew Cooper wrote:
> On 18/09/2023 12:30 pm, Jan Beulich wrote:
>> On 18.09.2023 11:24, Andrew Cooper wrote:
>>> On 15/09/2023 9:36 pm, Andrew Cooper wrote:
>>>> --- a/xen/arch/x86/x86_emulate/x86_emulate.c
>>>> +++ b/xen/arch/x86/x86_emulate/x86_emulate.c
>>>> @@ -8379,13 +8379,6 @@ x86_emulate(
>>>>      if ( !mode_64bit() )
>>>>          _regs.r(ip) = (uint32_t)_regs.r(ip);
>>>>  
>>>> -    /* Should a singlestep #DB be raised? */
>>>> -    if ( rc == X86EMUL_OKAY && singlestep && !ctxt->retire.mov_ss )
>>>> -    {
>>>> -        ctxt->retire.singlestep = true;
>>>> -        ctxt->retire.sti = false;
>>>> -    }
>>>> -
>>>>      if ( rc != X86EMUL_DONE )
>>>>          *ctxt->regs = _regs;
>>>>      else
>>>> @@ -8394,6 +8387,19 @@ x86_emulate(
>>>>          rc = X86EMUL_OKAY;
>>>>      }
>>>>  
>>>> +    /* Should a singlestep #DB be raised? */
>>>> +    if ( rc == X86EMUL_OKAY && singlestep )
>>>> +    {
>>>> +        ctxt->retire.pending_dbg |= X86_DR6_BS;
>>>> +
>>>> +        /* BROKEN - TODO, merge into pending_dbg. */
>>>> +        if ( !ctxt->retire.mov_ss )
>>>> +        {
>>>> +            ctxt->retire.singlestep = true;
>>>> +            ctxt->retire.sti = false;
>>>> +        }
>>> I occurs to me that setting X86_DR6_BS outside of the !mov_ss case will
>>> break HVM (when HVM swaps from singlestep to pending_dbg) until one of
>>> the further TODOs is addressed.
>>>
>>> This will need bringing back within the conditional to avoid regressions
>>> in the short term.
>> I'm afraid I don't understand this: Isn't the purpose to latch state no
>> matter whether it'll be consumed right away, or only on the next insn?
> 
> Yes, that is the intention in the longterm.
> 
> But in the short term, where I'm doing just enough to fix the %dr6 bits,
> putting this unconditionally in PENDING_DBG will break the emulation of
> mov-to-ss until the bigger todo of "wire INTERRUPTIBILITY/ACTIVITY state
> into the emulation context" is complete.

Since I assume we're talking about the tail of _hvm_emulate_one(), my
problem is that I cannot see how setting X86_DR6_BS would lead to a
problem there. Plus you don't touch x86/hvm/ at all in the series, and
the pending_dbg field gets newly introduced in the patch here. Hence it
looks to me as if for HVM the field could take any value, without
breaking the code. But then, from you explicitly pointing out a problem,
I can only infer that I'm overlooking something.

> The latter is definitely too big to fit into 4.18, and I can't
> intentionally regress mov-to-ss in a series we intend to backport.

Of course.

Jan
diff mbox series

Patch

diff --git a/xen/arch/x86/x86_emulate/x86_emulate.c b/xen/arch/x86/x86_emulate/x86_emulate.c
index 94caec1d142c..de7f99500e3f 100644
--- a/xen/arch/x86/x86_emulate/x86_emulate.c
+++ b/xen/arch/x86/x86_emulate/x86_emulate.c
@@ -8379,13 +8379,6 @@  x86_emulate(
     if ( !mode_64bit() )
         _regs.r(ip) = (uint32_t)_regs.r(ip);
 
-    /* Should a singlestep #DB be raised? */
-    if ( rc == X86EMUL_OKAY && singlestep && !ctxt->retire.mov_ss )
-    {
-        ctxt->retire.singlestep = true;
-        ctxt->retire.sti = false;
-    }
-
     if ( rc != X86EMUL_DONE )
         *ctxt->regs = _regs;
     else
@@ -8394,6 +8387,19 @@  x86_emulate(
         rc = X86EMUL_OKAY;
     }
 
+    /* Should a singlestep #DB be raised? */
+    if ( rc == X86EMUL_OKAY && singlestep )
+    {
+        ctxt->retire.pending_dbg |= X86_DR6_BS;
+
+        /* BROKEN - TODO, merge into pending_dbg. */
+        if ( !ctxt->retire.mov_ss )
+        {
+            ctxt->retire.singlestep = true;
+            ctxt->retire.sti = false;
+        }
+    }
+
     ctxt->regs->eflags &= ~X86_EFLAGS_RF;
 
  done:
diff --git a/xen/arch/x86/x86_emulate/x86_emulate.h b/xen/arch/x86/x86_emulate/x86_emulate.h
index 698750267a90..fbc023c37e34 100644
--- a/xen/arch/x86/x86_emulate/x86_emulate.h
+++ b/xen/arch/x86/x86_emulate/x86_emulate.h
@@ -588,15 +588,23 @@  struct x86_emulate_ctxt
     /* Canonical opcode (see below) (valid only on X86EMUL_OKAY). */
     unsigned int opcode;
 
-    /* Retirement state, set by the emulator (valid only on X86EMUL_OKAY). */
+    /*
+     * Retirement state, set by the emulator (valid only on X86EMUL_OKAY).
+     *
+     * TODO: all this state should be input/output from the VMCS PENDING_DBG,
+     * INTERRUPTIBILITY and ACTIVITIY fields.
+     */
     union {
-        uint8_t raw;
+        unsigned long raw;
         struct {
+            /* Accumulated %dr6 trap bits, positive polarity. */
+            unsigned int pending_dbg;
+
             bool hlt:1;          /* Instruction HLTed. */
             bool mov_ss:1;       /* Instruction sets MOV-SS irq shadow. */
             bool sti:1;          /* Instruction sets STI irq shadow. */
             bool unblock_nmi:1;  /* Instruction clears NMI blocking. */
-            bool singlestep:1;   /* Singlestepping was active. */
+            bool singlestep:1;   /* Singlestepping was active. (TODO, merge into pending_dbg) */
         };
     } retire;