diff mbox

[v4,8/8] x86/vm_event: Add HVM debug exception vm_events

Message ID 1464561430-13465-8-git-send-email-tamas@tklengyel.com (mailing list archive)
State New, archived
Headers show

Commit Message

Tamas K Lengyel May 29, 2016, 10:37 p.m. UTC
Since in-guest debug exceptions are now unconditionally trapped to Xen, adding
a hook for vm_event subscribers to tap into this new always-on guest event. We
rename along the way hvm_event_breakpoint_type to hvm_event_type to better
match the various events that can be passed with it. We also introduce the
necessary monitor_op domctl's to enable subscribing to the events.

Signed-off-by: Tamas K Lengyel <tamas@tklengyel.com>
---
Cc: Ian Jackson <ian.jackson@eu.citrix.com>
Cc: Wei Liu <wei.liu2@citrix.com>
Cc: Razvan Cojocaru <rcojocaru@bitdefender.com>
Cc: Jan Beulich <jbeulich@suse.com>
Cc: Andrew Cooper <andrew.cooper3@citrix.com>
Cc: Jun Nakajima <jun.nakajima@intel.com>
Cc: Kevin Tian <kevin.tian@intel.com>

v3: Increment vm_event interface version
v2: Rename hvm_monitor_event to hvm_monitor_debug
---
 tools/libxc/include/xenctrl.h       |  3 +-
 tools/libxc/xc_monitor.c            | 15 +++++++++
 tools/tests/xen-access/xen-access.c | 61 ++++++++++++++++++++++++++++++++-----
 xen/arch/x86/hvm/monitor.c          | 26 ++++++++++++++--
 xen/arch/x86/hvm/vmx/vmx.c          | 26 +++++++++++++---
 xen/arch/x86/monitor.c              | 16 ++++++++++
 xen/include/asm-x86/domain.h        |  2 ++
 xen/include/asm-x86/hvm/monitor.h   |  7 +++--
 xen/include/asm-x86/monitor.h       |  3 +-
 xen/include/public/domctl.h         |  6 ++++
 xen/include/public/vm_event.h       | 14 +++++++--
 11 files changed, 157 insertions(+), 22 deletions(-)

Comments

Razvan Cojocaru May 30, 2016, 7:29 a.m. UTC | #1
On 05/30/2016 01:37 AM, Tamas K Lengyel wrote:
> Since in-guest debug exceptions are now unconditionally trapped to Xen, adding
> a hook for vm_event subscribers to tap into this new always-on guest event. We
> rename along the way hvm_event_breakpoint_type to hvm_event_type to better
> match the various events that can be passed with it. We also introduce the
> necessary monitor_op domctl's to enable subscribing to the events.
> 
> Signed-off-by: Tamas K Lengyel <tamas@tklengyel.com>
> ---
> Cc: Ian Jackson <ian.jackson@eu.citrix.com>
> Cc: Wei Liu <wei.liu2@citrix.com>
> Cc: Razvan Cojocaru <rcojocaru@bitdefender.com>
> Cc: Jan Beulich <jbeulich@suse.com>
> Cc: Andrew Cooper <andrew.cooper3@citrix.com>
> Cc: Jun Nakajima <jun.nakajima@intel.com>
> Cc: Kevin Tian <kevin.tian@intel.com>
> 
> v3: Increment vm_event interface version
> v2: Rename hvm_monitor_event to hvm_monitor_debug
> ---
>  tools/libxc/include/xenctrl.h       |  3 +-
>  tools/libxc/xc_monitor.c            | 15 +++++++++
>  tools/tests/xen-access/xen-access.c | 61 ++++++++++++++++++++++++++++++++-----
>  xen/arch/x86/hvm/monitor.c          | 26 ++++++++++++++--
>  xen/arch/x86/hvm/vmx/vmx.c          | 26 +++++++++++++---
>  xen/arch/x86/monitor.c              | 16 ++++++++++
>  xen/include/asm-x86/domain.h        |  2 ++
>  xen/include/asm-x86/hvm/monitor.h   |  7 +++--
>  xen/include/asm-x86/monitor.h       |  3 +-
>  xen/include/public/domctl.h         |  6 ++++
>  xen/include/public/vm_event.h       | 14 +++++++--
>  11 files changed, 157 insertions(+), 22 deletions(-)

Acked-by: Razvan Cojocaru <rcojocaru@bitdefender.com>
Jan Beulich May 30, 2016, 2:16 p.m. UTC | #2
>>> On 30.05.16 at 00:37, <tamas@tklengyel.com> wrote:
> @@ -117,7 +133,11 @@ int hvm_monitor_breakpoint(unsigned long rip,
>  
>      req.vcpu_id = curr->vcpu_id;
>  
> -    return vm_event_monitor_traps(curr, 1, &req);
> +    rc = vm_event_monitor_traps(curr, sync, &req);
> +    if ( type == HVM_MONITOR_DEBUG_EXCEPTION && rc == 1 && !sync )
> +        rc = 0;
> +
> +    return rc;
>  }

To someone like me, not intimately familiar with the code, this added
logic looks pretty arbitrary. Please add a comment indicating why
under these special circumstances rc needs to be altered here, which
then will hopefully also clarify why that can't be done right in
vm_event_monitor_traps().

> --- a/xen/arch/x86/hvm/vmx/vmx.c
> +++ b/xen/arch/x86/hvm/vmx/vmx.c
> @@ -3377,9 +3377,25 @@ void vmx_vmexit_handler(struct cpu_user_regs *regs)
>              HVMTRACE_1D(TRAP_DEBUG, exit_qualification);
>              write_debugreg(6, exit_qualification | DR_STATUS_RESERVED_ONE);
>              if ( !v->domain->debugger_attached )
> -                vmx_propagate_intr(intr_info);
> +            {
> +                unsigned long insn_length = 0;
> +                int handled;

The variable name suggests it wants to be bool_t, but ...

> +                unsigned long trap_type = MASK_EXTR(intr_info,
> +                                                    INTR_INFO_INTR_TYPE_MASK);
> +
> +                if( trap_type >= X86_EVENTTYPE_SW_INTERRUPT )
> +                    __vmread(VM_EXIT_INSTRUCTION_LEN, &insn_length);
> +
> +                handled = hvm_monitor_debug(regs->eip,
> +                                            HVM_MONITOR_DEBUG_EXCEPTION,
> +                                            trap_type, insn_length);
> +                if ( handled <= 0 )

... it clearly can't. Please use a better name (could by just "rc" or
"ret"). (Otoh I see that code you modify further down uses that
same naming for a similar purpose variable. Let's see what the VMX
maintainers say.)

> +                    vmx_propagate_intr(intr_info);
> +
> +            }
>              else
>                  domain_pause_for_debugger();
> +
>              break;
>          case TRAP_int3: 

If anywhere, this added blank line wants to go after the break.

> @@ -3393,8 +3409,9 @@ void vmx_vmexit_handler(struct cpu_user_regs *regs)
>              }
>              else {
>                  int handled =
> -                    hvm_monitor_breakpoint(regs->eip,
> -                                           HVM_MONITOR_SOFTWARE_BREAKPOINT);
> +                        hvm_monitor_debug(regs->eip,
> +                                          HVM_MONITOR_SOFTWARE_BREAKPOINT,
> +                                          X86_EVENTTYPE_SW_EXCEPTION, 1);

Please let's not add further mistakes like this, assuming INT3 can't
have any prefixes. It can, even if they're useless.

> @@ -3721,8 +3738,7 @@ void vmx_vmexit_handler(struct cpu_user_regs *regs)
>          vmx_update_cpu_exec_control(v);
>          if ( v->arch.hvm_vcpu.single_step )
>          {
> -            hvm_monitor_breakpoint(regs->eip,
> -                                   HVM_MONITOR_SINGLESTEP_BREAKPOINT);
> +            hvm_monitor_debug(regs->eip, HVM_MONITOR_SINGLESTEP_BREAKPOINT, 0, 0);

How come the 3rd argument is literal zero here?

Also you're creating a long line here.

Jan
Tamas K Lengyel May 30, 2016, 8:13 p.m. UTC | #3
On Mon, May 30, 2016 at 8:16 AM, Jan Beulich <JBeulich@suse.com> wrote:
>>>> On 30.05.16 at 00:37, <tamas@tklengyel.com> wrote:
>> @@ -117,7 +133,11 @@ int hvm_monitor_breakpoint(unsigned long rip,
>>
>>      req.vcpu_id = curr->vcpu_id;
>>
>> -    return vm_event_monitor_traps(curr, 1, &req);
>> +    rc = vm_event_monitor_traps(curr, sync, &req);
>> +    if ( type == HVM_MONITOR_DEBUG_EXCEPTION && rc == 1 && !sync )
>> +        rc = 0;
>> +
>> +    return rc;
>>  }
>
> To someone like me, not intimately familiar with the code, this added
> logic looks pretty arbitrary. Please add a comment indicating why
> under these special circumstances rc needs to be altered here, which
> then will hopefully also clarify why that can't be done right in
> vm_event_monitor_traps().

Yea, vm_event_monitor_traps is not the most straight forward function
in regards of what the return value means. I'll see if I can clean it
up a bit in another patch.

>
>> --- a/xen/arch/x86/hvm/vmx/vmx.c
>> +++ b/xen/arch/x86/hvm/vmx/vmx.c
>> @@ -3377,9 +3377,25 @@ void vmx_vmexit_handler(struct cpu_user_regs *regs)
>>              HVMTRACE_1D(TRAP_DEBUG, exit_qualification);
>>              write_debugreg(6, exit_qualification | DR_STATUS_RESERVED_ONE);
>>              if ( !v->domain->debugger_attached )
>> -                vmx_propagate_intr(intr_info);
>> +            {
>> +                unsigned long insn_length = 0;
>> +                int handled;
>
> The variable name suggests it wants to be bool_t, but ...
>
>> +                unsigned long trap_type = MASK_EXTR(intr_info,
>> +                                                    INTR_INFO_INTR_TYPE_MASK);
>> +
>> +                if( trap_type >= X86_EVENTTYPE_SW_INTERRUPT )
>> +                    __vmread(VM_EXIT_INSTRUCTION_LEN, &insn_length);
>> +
>> +                handled = hvm_monitor_debug(regs->eip,
>> +                                            HVM_MONITOR_DEBUG_EXCEPTION,
>> +                                            trap_type, insn_length);
>> +                if ( handled <= 0 )
>
> ... it clearly can't. Please use a better name (could by just "rc" or
> "ret"). (Otoh I see that code you modify further down uses that
> same naming for a similar purpose variable. Let's see what the VMX
> maintainers say.)
>
>> +                    vmx_propagate_intr(intr_info);
>> +
>> +            }
>>              else
>>                  domain_pause_for_debugger();
>> +
>>              break;
>>          case TRAP_int3:
>
> If anywhere, this added blank line wants to go after the break.
>
>> @@ -3393,8 +3409,9 @@ void vmx_vmexit_handler(struct cpu_user_regs *regs)
>>              }
>>              else {
>>                  int handled =
>> -                    hvm_monitor_breakpoint(regs->eip,
>> -                                           HVM_MONITOR_SOFTWARE_BREAKPOINT);
>> +                        hvm_monitor_debug(regs->eip,
>> +                                          HVM_MONITOR_SOFTWARE_BREAKPOINT,
>> +                                          X86_EVENTTYPE_SW_EXCEPTION, 1);
>
> Please let's not add further mistakes like this, assuming INT3 can't
> have any prefixes. It can, even if they're useless.

You mean the instruction length is not necessarily 1? Ultimately it
doesn't seem to matter because reinjecting it with xc_hvm_inject_trap
ignores this field. Instruction length is only required to be properly
set AFAICT for a subset of debug exceptions during reinjection.

>
>> @@ -3721,8 +3738,7 @@ void vmx_vmexit_handler(struct cpu_user_regs *regs)
>>          vmx_update_cpu_exec_control(v);
>>          if ( v->arch.hvm_vcpu.single_step )
>>          {
>> -            hvm_monitor_breakpoint(regs->eip,
>> -                                   HVM_MONITOR_SINGLESTEP_BREAKPOINT);
>> +            hvm_monitor_debug(regs->eip, HVM_MONITOR_SINGLESTEP_BREAKPOINT, 0, 0);
>
> How come the 3rd argument is literal zero here?

Instruction length is only meaningful for a subset of debug exceptions
that could be reinjected to the guest using xc_hvm_inject_trap.
Monitor trap flag events are external to the guest so there is nothing
to inject. The instruction length field won't even exist for this type
of singlestep events (we distinguish vm_event_singlestep and
vm_event_debug structs for this reason), so 0 here is arbitrary. We
could set it to ~0 to make it more obvious that it's just a
placeholder in this context.

> Also you're creating a long line here.

Ack.

Thanks,
Tamas
Andrew Cooper May 30, 2016, 8:58 p.m. UTC | #4
>>> @@ -3393,8 +3409,9 @@ void vmx_vmexit_handler(struct cpu_user_regs *regs)
>>>              }
>>>              else {
>>>                  int handled =
>>> -                    hvm_monitor_breakpoint(regs->eip,
>>> -                                           HVM_MONITOR_SOFTWARE_BREAKPOINT);
>>> +                        hvm_monitor_debug(regs->eip,
>>> +                                          HVM_MONITOR_SOFTWARE_BREAKPOINT,
>>> +                                          X86_EVENTTYPE_SW_EXCEPTION, 1);
>> Please let's not add further mistakes like this, assuming INT3 can't
>> have any prefixes. It can, even if they're useless.
> You mean the instruction length is not necessarily 1? Ultimately it
> doesn't seem to matter because reinjecting it with xc_hvm_inject_trap
> ignores this field. Instruction length is only required to be properly
> set AFAICT for a subset of debug exceptions during reinjection.

Almost all x86 instructions can have redundant prefixes which change
their length without altering their functionality.

This specific area was the subject of XSA-106, and is astoundingly fragile.

Luckily, I have an available functional test which confirms correct
behaviour from the point of view of the guest.

http://xenbits.xen.org/people/andrewcoop/xen-test-framework/test-swint-emulation.html

Please confirm that this test returns success even when being monitored
with this new functionality.

~Andrew
Jan Beulich May 31, 2016, 7:59 a.m. UTC | #5
>>> On 30.05.16 at 22:13, <tamas@tklengyel.com> wrote:
> On Mon, May 30, 2016 at 8:16 AM, Jan Beulich <JBeulich@suse.com> wrote:
>>>>> On 30.05.16 at 00:37, <tamas@tklengyel.com> wrote:
>>> @@ -3393,8 +3409,9 @@ void vmx_vmexit_handler(struct cpu_user_regs *regs)
>>>              }
>>>              else {
>>>                  int handled =
>>> -                    hvm_monitor_breakpoint(regs->eip,
>>> -                                           HVM_MONITOR_SOFTWARE_BREAKPOINT);
>>> +                        hvm_monitor_debug(regs->eip,
>>> +                                          HVM_MONITOR_SOFTWARE_BREAKPOINT,
>>> +                                          X86_EVENTTYPE_SW_EXCEPTION, 1);
>>
>> Please let's not add further mistakes like this, assuming INT3 can't
>> have any prefixes. It can, even if they're useless.
> 
> You mean the instruction length is not necessarily 1? Ultimately it
> doesn't seem to matter because reinjecting it with xc_hvm_inject_trap
> ignores this field. Instruction length is only required to be properly
> set AFAICT for a subset of debug exceptions during reinjection.

As you suggest later in your reply, if the insn length really doesn't
matter, this should be made recognizable here. Either by a suitably
named manifest constant (which could then even evaluate to zero),
or by a comment (personally I'd prefer the former, but I'm not
maintainer of this code).

Jan
Tamas K Lengyel June 1, 2016, 9:46 p.m. UTC | #6
On Tue, May 31, 2016 at 1:59 AM, Jan Beulich <JBeulich@suse.com> wrote:
>>>> On 30.05.16 at 22:13, <tamas@tklengyel.com> wrote:
>> On Mon, May 30, 2016 at 8:16 AM, Jan Beulich <JBeulich@suse.com> wrote:
>>>>>> On 30.05.16 at 00:37, <tamas@tklengyel.com> wrote:
>>>> @@ -3393,8 +3409,9 @@ void vmx_vmexit_handler(struct cpu_user_regs *regs)
>>>>              }
>>>>              else {
>>>>                  int handled =
>>>> -                    hvm_monitor_breakpoint(regs->eip,
>>>> -                                           HVM_MONITOR_SOFTWARE_BREAKPOINT);
>>>> +                        hvm_monitor_debug(regs->eip,
>>>> +                                          HVM_MONITOR_SOFTWARE_BREAKPOINT,
>>>> +                                          X86_EVENTTYPE_SW_EXCEPTION, 1);
>>>
>>> Please let's not add further mistakes like this, assuming INT3 can't
>>> have any prefixes. It can, even if they're useless.
>>
>> You mean the instruction length is not necessarily 1? Ultimately it
>> doesn't seem to matter because reinjecting it with xc_hvm_inject_trap
>> ignores this field. Instruction length is only required to be properly
>> set AFAICT for a subset of debug exceptions during reinjection.
>
> As you suggest later in your reply, if the insn length really doesn't
> matter, this should be made recognizable here. Either by a suitably
> named manifest constant (which could then even evaluate to zero),
> or by a comment (personally I'd prefer the former, but I'm not
> maintainer of this code).
>
> Jan


Running Andrew's framework with xen-access monitoring breakpoints results in

xen-access:
Got event from Xen
Breakpoint: rip=00000000001032d1, gfn=103 (vcpu 0)

xl dmesg:
(d28) --- Xen Test Framework ---
(d28) Environment: HVM 64bit (Long mode 4 levels)
(d28) Trap emulation
(d28) Warning: FEP support not detected - some tests will be skipped
(d28) Test cpl0: all perms ok
(d28)   Testing int3
(XEN) d28v0 VMRESUME error: 0x7
(XEN) domain_crash_sync called from vmcs.c:1599
(XEN) Domain 28 (vcpu#0) crashed on cpu#7:
(XEN) ----[ Xen-4.6.1  x86_64  debug=n  Not tainted ]----
(XEN) CPU:    7
(XEN) RIP:    0008:[<00000000001032d1>]
(XEN) RFLAGS: 0000000000000046   CONTEXT: hvm guest (d28v0)
(XEN) rax: 00000000001032d2   rbx: 00000000001102b0   rcx: 0000000000000000
(XEN) rdx: 0000000000104af0   rsi: 0000000000000000   rdi: 0000000000000000
(XEN) rbp: 0000000000000001   rsp: 0000000000114f98   r8:  000000000000000f
(XEN) r9:  00000000000000ad   r10: 000000000000000f   r11: 0000000000000004
(XEN) r12: 0000000000000003   r13: 0000000000000000   r14: 0000000000000000
(XEN) r15: 0000000000000000   cr0: 0000000080000011   cr4: 0000000000000020
(XEN) cr3: 000000000010b000   cr2: 0000000000000000
(XEN) ds: 0033   es: 0033   fs: 0033   gs: 0033   ss: 0000   cs: 0008

This is likely because xen-access sets the instruction length to 0
during reinjection. If I change that to 1 the tests still fail but
without crashing the domain, output:

xen-access:
Got event from Xen
Breakpoint: rip=00000000001032d1, gfn=103 (vcpu 0)
Got event from Xen
Breakpoint: rip=00000000001032e1, gfn=103 (vcpu 0)
Got event from Xen
Breakpoint: rip=00000000001032e2, gfn=103 (vcpu 0)
Got event from Xen
Breakpoint: rip=00000000001032d1, gfn=103 (vcpu 0)
Got event from Xen
Breakpoint: rip=00000000001032e1, gfn=103 (vcpu 0)
Got event from Xen
Breakpoint: rip=00000000001032d1, gfn=103 (vcpu 0)
Got event from Xen
Breakpoint: rip=00000000001032e1, gfn=103 (vcpu 0)
Got event from Xen
Breakpoint: rip=00000000001032e2, gfn=103 (vcpu 0)
Got event from Xen
Breakpoint: rip=00000000001032d1, gfn=103 (vcpu 0)
Got event from Xen
Breakpoint: rip=00000000001032e1, gfn=103 (vcpu 0)
Got event from Xen
Breakpoint: rip=00000000001032d1, gfn=103 (vcpu 0)
Got event from Xen
Breakpoint: rip=00000000001032e1, gfn=103 (vcpu 0)

xl dmesg:
(d30) Environment: HVM 64bit (Long mode 4 levels)
(d30) Trap emulation
(d30) Warning: FEP support not detected - some tests will be skipped
(d30) Test cpl0: all perms ok
(d30)   Testing int3
(d30) Fail redundant: Expected 1 exception (vec 3 at 00000000001032e3), got 2
(d30)  exlog[00] 0008:00000000001032e2 vec 3[0000]
(d30)  exlog[01] 0008:00000000001032e3 vec 3[0000]
(d30)   Testing int $3
(d30)   Testing icebp
(d30)   Testing int $1
(d30)   Testing into
(d30) Test cpl0: p=0
(d30)   Testing int3
(d30)   Testing int $3
(d30)   Testing icebp
(d30)   Testing int $1
(d30)   Testing into
(d30) Test cpl3: all perms ok
(d30)   Testing int3
(d30) Fail redundant: Expected 1 exception (vec 3 at 00000000001032e3), got 2
(d30)  exlog[00] 0023:00000000001032e2 vec 3[0000]
(d30)  exlog[01] 0023:00000000001032e3 vec 3[0000]
(d30)   Testing int $3
(d30)   Testing icebp
(d30)   Testing int $1
(d30)   Testing into
(d30) Test cpl3: p=0
(d30)   Testing int3
(d30)   Testing int $3
(d30)   Testing icebp
(d30)   Testing int $1
(d30)   Testing into
(d30) Test cpl3: dpl=0
(d30)   Testing int3
(d30)   Testing int $3
(d30)   Testing icebp
(d30)   Testing int $1
(d30)   Testing into
(d30) Test result: FAILURE

So we _should be_ sending the instruction length information along for
this type of vm_events and it is in fact buggy right now.

Tamas
Andrew Cooper June 1, 2016, 10:17 p.m. UTC | #7
On 01/06/2016 22:46, Tamas K Lengyel wrote:
> On Tue, May 31, 2016 at 1:59 AM, Jan Beulich <JBeulich@suse.com> wrote:
>>>>> On 30.05.16 at 22:13, <tamas@tklengyel.com> wrote:
>>> On Mon, May 30, 2016 at 8:16 AM, Jan Beulich <JBeulich@suse.com> wrote:
>>>>>>> On 30.05.16 at 00:37, <tamas@tklengyel.com> wrote:
>>>>> @@ -3393,8 +3409,9 @@ void vmx_vmexit_handler(struct cpu_user_regs *regs)
>>>>>              }
>>>>>              else {
>>>>>                  int handled =
>>>>> -                    hvm_monitor_breakpoint(regs->eip,
>>>>> -                                           HVM_MONITOR_SOFTWARE_BREAKPOINT);
>>>>> +                        hvm_monitor_debug(regs->eip,
>>>>> +                                          HVM_MONITOR_SOFTWARE_BREAKPOINT,
>>>>> +                                          X86_EVENTTYPE_SW_EXCEPTION, 1);
>>>> Please let's not add further mistakes like this, assuming INT3 can't
>>>> have any prefixes. It can, even if they're useless.
>>> You mean the instruction length is not necessarily 1? Ultimately it
>>> doesn't seem to matter because reinjecting it with xc_hvm_inject_trap
>>> ignores this field. Instruction length is only required to be properly
>>> set AFAICT for a subset of debug exceptions during reinjection.
>> As you suggest later in your reply, if the insn length really doesn't
>> matter, this should be made recognizable here. Either by a suitably
>> named manifest constant (which could then even evaluate to zero),
>> or by a comment (personally I'd prefer the former, but I'm not
>> maintainer of this code).
>>
>> Jan
>
> Running Andrew's framework with xen-access monitoring breakpoints results in
>
> xen-access:
> Got event from Xen
> Breakpoint: rip=00000000001032d1, gfn=103 (vcpu 0)
>
> xl dmesg:
> (d28) --- Xen Test Framework ---
> (d28) Environment: HVM 64bit (Long mode 4 levels)
> (d28) Trap emulation
> (d28) Warning: FEP support not detected - some tests will be skipped
> (d28) Test cpl0: all perms ok
> (d28)   Testing int3
> (XEN) d28v0 VMRESUME error: 0x7
> (XEN) domain_crash_sync called from vmcs.c:1599
> (XEN) Domain 28 (vcpu#0) crashed on cpu#7:
> (XEN) ----[ Xen-4.6.1  x86_64  debug=n  Not tainted ]----
> (XEN) CPU:    7
> (XEN) RIP:    0008:[<00000000001032d1>]
> (XEN) RFLAGS: 0000000000000046   CONTEXT: hvm guest (d28v0)
> (XEN) rax: 00000000001032d2   rbx: 00000000001102b0   rcx: 0000000000000000
> (XEN) rdx: 0000000000104af0   rsi: 0000000000000000   rdi: 0000000000000000
> (XEN) rbp: 0000000000000001   rsp: 0000000000114f98   r8:  000000000000000f
> (XEN) r9:  00000000000000ad   r10: 000000000000000f   r11: 0000000000000004
> (XEN) r12: 0000000000000003   r13: 0000000000000000   r14: 0000000000000000
> (XEN) r15: 0000000000000000   cr0: 0000000080000011   cr4: 0000000000000020
> (XEN) cr3: 000000000010b000   cr2: 0000000000000000
> (XEN) ds: 0033   es: 0033   fs: 0033   gs: 0033   ss: 0000   cs: 0008
>
> This is likely because xen-access sets the instruction length to 0
> during reinjection. If I change that to 1 the tests still fail but
> without crashing the domain, output:
>
> xen-access:
> Got event from Xen
> Breakpoint: rip=00000000001032d1, gfn=103 (vcpu 0)
> Got event from Xen
> Breakpoint: rip=00000000001032e1, gfn=103 (vcpu 0)
> Got event from Xen
> Breakpoint: rip=00000000001032e2, gfn=103 (vcpu 0)
> Got event from Xen
> Breakpoint: rip=00000000001032d1, gfn=103 (vcpu 0)
> Got event from Xen
> Breakpoint: rip=00000000001032e1, gfn=103 (vcpu 0)
> Got event from Xen
> Breakpoint: rip=00000000001032d1, gfn=103 (vcpu 0)
> Got event from Xen
> Breakpoint: rip=00000000001032e1, gfn=103 (vcpu 0)
> Got event from Xen
> Breakpoint: rip=00000000001032e2, gfn=103 (vcpu 0)
> Got event from Xen
> Breakpoint: rip=00000000001032d1, gfn=103 (vcpu 0)
> Got event from Xen
> Breakpoint: rip=00000000001032e1, gfn=103 (vcpu 0)
> Got event from Xen
> Breakpoint: rip=00000000001032d1, gfn=103 (vcpu 0)
> Got event from Xen
> Breakpoint: rip=00000000001032e1, gfn=103 (vcpu 0)
>
> xl dmesg:
> (d30) Environment: HVM 64bit (Long mode 4 levels)
> (d30) Trap emulation
> (d30) Warning: FEP support not detected - some tests will be skipped
> (d30) Test cpl0: all perms ok
> (d30)   Testing int3
> (d30) Fail redundant: Expected 1 exception (vec 3 at 00000000001032e3), got 2
> (d30)  exlog[00] 0008:00000000001032e2 vec 3[0000]
> (d30)  exlog[01] 0008:00000000001032e3 vec 3[0000]
> (d30)   Testing int $3
> (d30)   Testing icebp
> (d30)   Testing int $1
> (d30)   Testing into
> (d30) Test cpl0: p=0
> (d30)   Testing int3
> (d30)   Testing int $3
> (d30)   Testing icebp
> (d30)   Testing int $1
> (d30)   Testing into
> (d30) Test cpl3: all perms ok
> (d30)   Testing int3
> (d30) Fail redundant: Expected 1 exception (vec 3 at 00000000001032e3), got 2
> (d30)  exlog[00] 0023:00000000001032e2 vec 3[0000]
> (d30)  exlog[01] 0023:00000000001032e3 vec 3[0000]
> (d30)   Testing int $3
> (d30)   Testing icebp
> (d30)   Testing int $1
> (d30)   Testing into
> (d30) Test cpl3: p=0
> (d30)   Testing int3
> (d30)   Testing int $3
> (d30)   Testing icebp
> (d30)   Testing int $1
> (d30)   Testing into
> (d30) Test cpl3: dpl=0
> (d30)   Testing int3
> (d30)   Testing int $3
> (d30)   Testing icebp
> (d30)   Testing int $1
> (d30)   Testing into
> (d30) Test result: FAILURE
>
> So we _should be_ sending the instruction length information along for
> this type of vm_events and it is in fact buggy right now.

On a related note, do emulated instruction get appropriately sent for
introspection?

You can check this by booting a debug hypervisor with "hvm_fep" on the
command line, which will double the number of tests run by this suite.

If they are sent for emulation, you would expect to see some "Fail force
redundant:" issues as well.  I can't think of any codepath offhand which
links the emulation results up to the current introspection paths.

~Andrew
Tamas K Lengyel June 2, 2016, 12:01 a.m. UTC | #8
On Wed, Jun 1, 2016 at 4:17 PM, Andrew Cooper <andrew.cooper3@citrix.com> wrote:
> On 01/06/2016 22:46, Tamas K Lengyel wrote:
>> On Tue, May 31, 2016 at 1:59 AM, Jan Beulich <JBeulich@suse.com> wrote:
>>>>>> On 30.05.16 at 22:13, <tamas@tklengyel.com> wrote:
>>>> On Mon, May 30, 2016 at 8:16 AM, Jan Beulich <JBeulich@suse.com> wrote:
>>>>>>>> On 30.05.16 at 00:37, <tamas@tklengyel.com> wrote:
>>>>>> @@ -3393,8 +3409,9 @@ void vmx_vmexit_handler(struct cpu_user_regs *regs)
>>>>>>              }
>>>>>>              else {
>>>>>>                  int handled =
>>>>>> -                    hvm_monitor_breakpoint(regs->eip,
>>>>>> -                                           HVM_MONITOR_SOFTWARE_BREAKPOINT);
>>>>>> +                        hvm_monitor_debug(regs->eip,
>>>>>> +                                          HVM_MONITOR_SOFTWARE_BREAKPOINT,
>>>>>> +                                          X86_EVENTTYPE_SW_EXCEPTION, 1);
>>>>> Please let's not add further mistakes like this, assuming INT3 can't
>>>>> have any prefixes. It can, even if they're useless.
>>>> You mean the instruction length is not necessarily 1? Ultimately it
>>>> doesn't seem to matter because reinjecting it with xc_hvm_inject_trap
>>>> ignores this field. Instruction length is only required to be properly
>>>> set AFAICT for a subset of debug exceptions during reinjection.
>>> As you suggest later in your reply, if the insn length really doesn't
>>> matter, this should be made recognizable here. Either by a suitably
>>> named manifest constant (which could then even evaluate to zero),
>>> or by a comment (personally I'd prefer the former, but I'm not
>>> maintainer of this code).
>>>
>>> Jan
>>
>> Running Andrew's framework with xen-access monitoring breakpoints results in
>>
>> xen-access:
>> Got event from Xen
>> Breakpoint: rip=00000000001032d1, gfn=103 (vcpu 0)
>>
>> xl dmesg:
>> (d28) --- Xen Test Framework ---
>> (d28) Environment: HVM 64bit (Long mode 4 levels)
>> (d28) Trap emulation
>> (d28) Warning: FEP support not detected - some tests will be skipped
>> (d28) Test cpl0: all perms ok
>> (d28)   Testing int3
>> (XEN) d28v0 VMRESUME error: 0x7
>> (XEN) domain_crash_sync called from vmcs.c:1599
>> (XEN) Domain 28 (vcpu#0) crashed on cpu#7:
>> (XEN) ----[ Xen-4.6.1  x86_64  debug=n  Not tainted ]----
>> (XEN) CPU:    7
>> (XEN) RIP:    0008:[<00000000001032d1>]
>> (XEN) RFLAGS: 0000000000000046   CONTEXT: hvm guest (d28v0)
>> (XEN) rax: 00000000001032d2   rbx: 00000000001102b0   rcx: 0000000000000000
>> (XEN) rdx: 0000000000104af0   rsi: 0000000000000000   rdi: 0000000000000000
>> (XEN) rbp: 0000000000000001   rsp: 0000000000114f98   r8:  000000000000000f
>> (XEN) r9:  00000000000000ad   r10: 000000000000000f   r11: 0000000000000004
>> (XEN) r12: 0000000000000003   r13: 0000000000000000   r14: 0000000000000000
>> (XEN) r15: 0000000000000000   cr0: 0000000080000011   cr4: 0000000000000020
>> (XEN) cr3: 000000000010b000   cr2: 0000000000000000
>> (XEN) ds: 0033   es: 0033   fs: 0033   gs: 0033   ss: 0000   cs: 0008
>>
>> This is likely because xen-access sets the instruction length to 0
>> during reinjection. If I change that to 1 the tests still fail but
>> without crashing the domain, output:
>>
>> xen-access:
>> Got event from Xen
>> Breakpoint: rip=00000000001032d1, gfn=103 (vcpu 0)
>> Got event from Xen
>> Breakpoint: rip=00000000001032e1, gfn=103 (vcpu 0)
>> Got event from Xen
>> Breakpoint: rip=00000000001032e2, gfn=103 (vcpu 0)
>> Got event from Xen
>> Breakpoint: rip=00000000001032d1, gfn=103 (vcpu 0)
>> Got event from Xen
>> Breakpoint: rip=00000000001032e1, gfn=103 (vcpu 0)
>> Got event from Xen
>> Breakpoint: rip=00000000001032d1, gfn=103 (vcpu 0)
>> Got event from Xen
>> Breakpoint: rip=00000000001032e1, gfn=103 (vcpu 0)
>> Got event from Xen
>> Breakpoint: rip=00000000001032e2, gfn=103 (vcpu 0)
>> Got event from Xen
>> Breakpoint: rip=00000000001032d1, gfn=103 (vcpu 0)
>> Got event from Xen
>> Breakpoint: rip=00000000001032e1, gfn=103 (vcpu 0)
>> Got event from Xen
>> Breakpoint: rip=00000000001032d1, gfn=103 (vcpu 0)
>> Got event from Xen
>> Breakpoint: rip=00000000001032e1, gfn=103 (vcpu 0)
>>
>> xl dmesg:
>> (d30) Environment: HVM 64bit (Long mode 4 levels)
>> (d30) Trap emulation
>> (d30) Warning: FEP support not detected - some tests will be skipped
>> (d30) Test cpl0: all perms ok
>> (d30)   Testing int3
>> (d30) Fail redundant: Expected 1 exception (vec 3 at 00000000001032e3), got 2
>> (d30)  exlog[00] 0008:00000000001032e2 vec 3[0000]
>> (d30)  exlog[01] 0008:00000000001032e3 vec 3[0000]
>> (d30)   Testing int $3
>> (d30)   Testing icebp
>> (d30)   Testing int $1
>> (d30)   Testing into
>> (d30) Test cpl0: p=0
>> (d30)   Testing int3
>> (d30)   Testing int $3
>> (d30)   Testing icebp
>> (d30)   Testing int $1
>> (d30)   Testing into
>> (d30) Test cpl3: all perms ok
>> (d30)   Testing int3
>> (d30) Fail redundant: Expected 1 exception (vec 3 at 00000000001032e3), got 2
>> (d30)  exlog[00] 0023:00000000001032e2 vec 3[0000]
>> (d30)  exlog[01] 0023:00000000001032e3 vec 3[0000]
>> (d30)   Testing int $3
>> (d30)   Testing icebp
>> (d30)   Testing int $1
>> (d30)   Testing into
>> (d30) Test cpl3: p=0
>> (d30)   Testing int3
>> (d30)   Testing int $3
>> (d30)   Testing icebp
>> (d30)   Testing int $1
>> (d30)   Testing into
>> (d30) Test cpl3: dpl=0
>> (d30)   Testing int3
>> (d30)   Testing int $3
>> (d30)   Testing icebp
>> (d30)   Testing int $1
>> (d30)   Testing into
>> (d30) Test result: FAILURE
>>
>> So we _should be_ sending the instruction length information along for
>> this type of vm_events and it is in fact buggy right now.
>
> On a related note, do emulated instruction get appropriately sent for
> introspection?

I'm fairly certain emulated int3 is not forwarded through vm_event as
the only hook we have right now for int3 comes from vmx.c.

>
> You can check this by booting a debug hypervisor with "hvm_fep" on the
> command line, which will double the number of tests run by this suite.
>
> If they are sent for emulation, you would expect to see some "Fail force
> redundant:" issues as well.  I can't think of any codepath offhand which
> links the emulation results up to the current introspection paths.

I'll try it out, thanks!

Tamas
diff mbox

Patch

diff --git a/tools/libxc/include/xenctrl.h b/tools/libxc/include/xenctrl.h
index 3085130..29d983e 100644
--- a/tools/libxc/include/xenctrl.h
+++ b/tools/libxc/include/xenctrl.h
@@ -2162,7 +2162,8 @@  int xc_monitor_guest_request(xc_interface *xch, domid_t domain_id,
                              bool enable, bool sync);
 int xc_monitor_privileged_call(xc_interface *xch, domid_t domain_id,
                                bool enable);
-
+int xc_monitor_debug_exceptions(xc_interface *xch, domid_t domain_id,
+                                bool enable, bool sync);
 /**
  * This function enables / disables emulation for each REP for a
  * REP-compatible instruction.
diff --git a/tools/libxc/xc_monitor.c b/tools/libxc/xc_monitor.c
index 1e4a9d2..b0bf708 100644
--- a/tools/libxc/xc_monitor.c
+++ b/tools/libxc/xc_monitor.c
@@ -171,6 +171,21 @@  int xc_monitor_privileged_call(xc_interface *xch, domid_t domain_id,
     return do_domctl(xch, &domctl);
 }
 
+int xc_monitor_debug_exceptions(xc_interface *xch, domid_t domain_id,
+                                bool enable, bool sync)
+{
+    DECLARE_DOMCTL;
+
+    domctl.cmd = XEN_DOMCTL_monitor_op;
+    domctl.domain = domain_id;
+    domctl.u.monitor_op.op = enable ? XEN_DOMCTL_MONITOR_OP_ENABLE
+                                    : XEN_DOMCTL_MONITOR_OP_DISABLE;
+    domctl.u.monitor_op.event = XEN_DOMCTL_MONITOR_EVENT_DEBUG_EXCEPTION;
+    domctl.u.monitor_op.u.debug_exception.sync = sync;
+
+    return do_domctl(xch, &domctl);
+}
+
 /*
  * Local variables:
  * mode: C
diff --git a/tools/tests/xen-access/xen-access.c b/tools/tests/xen-access/xen-access.c
index 1b2b3fd..8678ccd 100644
--- a/tools/tests/xen-access/xen-access.c
+++ b/tools/tests/xen-access/xen-access.c
@@ -52,6 +52,10 @@ 
 #define ERROR(a, b...) fprintf(stderr, a "\n", ## b)
 #define PERROR(a, b...) fprintf(stderr, a ": %s\n", ## b, strerror(errno))
 
+/* From xen/include/asm-x86/processor.h */
+#define X86_TRAP_DEBUG  1
+#define X86_TRAP_INT3   3
+
 typedef struct vm_event {
     domid_t domain_id;
     xenevtchn_handle *xce_handle;
@@ -332,7 +336,7 @@  void usage(char* progname)
 {
     fprintf(stderr, "Usage: %s [-m] <domain_id> write|exec", progname);
 #if defined(__i386__) || defined(__x86_64__)
-            fprintf(stderr, "|breakpoint|altp2m_write|altp2m_exec");
+            fprintf(stderr, "|breakpoint|altp2m_write|altp2m_exec|debug");
 #elif defined(__arm__) || defined(__aarch64__)
             fprintf(stderr, "|privcall");
 #endif
@@ -355,11 +359,13 @@  int main(int argc, char *argv[])
     xc_interface *xch;
     xenmem_access_t default_access = XENMEM_access_rwx;
     xenmem_access_t after_first_access = XENMEM_access_rwx;
+    int memaccess = 0;
     int required = 0;
     int breakpoint = 0;
     int shutting_down = 0;
     int privcall = 0;
     int altp2m = 0;
+    int debug = 0;
     uint16_t altp2m_view_id = 0;
 
     char* progname = argv[0];
@@ -393,11 +399,13 @@  int main(int argc, char *argv[])
     {
         default_access = XENMEM_access_rx;
         after_first_access = XENMEM_access_rwx;
+        memaccess = 1;
     }
     else if ( !strcmp(argv[0], "exec") )
     {
         default_access = XENMEM_access_rw;
         after_first_access = XENMEM_access_rwx;
+        memaccess = 1;
     }
 #if defined(__i386__) || defined(__x86_64__)
     else if ( !strcmp(argv[0], "breakpoint") )
@@ -408,11 +416,17 @@  int main(int argc, char *argv[])
     {
         default_access = XENMEM_access_rx;
         altp2m = 1;
+        memaccess = 1;
     }
     else if ( !strcmp(argv[0], "altp2m_exec") )
     {
         default_access = XENMEM_access_rw;
         altp2m = 1;
+        memaccess = 1;
+    }
+    else if ( !strcmp(argv[0], "debug") )
+    {
+        debug = 1;
     }
 #elif defined(__arm__) || defined(__aarch64__)
     else if ( !strcmp(argv[0], "privcall") )
@@ -453,7 +467,7 @@  int main(int argc, char *argv[])
     }
 
     /* With altp2m we just create a new, restricted view of the memory */
-    if ( altp2m )
+    if ( memaccess && altp2m )
     {
         xen_pfn_t gfn = 0;
         unsigned long perm_set = 0;
@@ -500,7 +514,7 @@  int main(int argc, char *argv[])
         }
     }
 
-    if ( !altp2m )
+    if ( memaccess && !altp2m )
     {
         /* Set the default access type and convert all pages to it */
         rc = xc_set_mem_access(xch, domain_id, default_access, ~0ull, 0);
@@ -541,6 +555,16 @@  int main(int argc, char *argv[])
         }
     }
 
+    if ( debug )
+    {
+        rc = xc_monitor_debug_exceptions(xch, domain_id, 1, 1);
+        if ( rc < 0 )
+        {
+            ERROR("Error %d setting debug exception listener with vm_event\n", rc);
+            goto exit;
+        }
+    }
+
     /* Wait for access */
     for (;;)
     {
@@ -551,9 +575,10 @@  int main(int argc, char *argv[])
 
             if ( breakpoint )
                 rc = xc_monitor_software_breakpoint(xch, domain_id, 0);
-
             if ( privcall )
                 rc = xc_monitor_privileged_call(xch, domain_id, 0);
+            if ( debug )
+                rc = xc_monitor_debug_exceptions(xch, domain_id, 0, 0);
 
             if ( altp2m )
             {
@@ -661,9 +686,10 @@  int main(int argc, char *argv[])
                        req.vcpu_id);
 
                 /* Reinject */
-                rc = xc_hvm_inject_trap(
-                    xch, domain_id, req.vcpu_id, 3,
-                    HVMOP_TRAP_sw_exc, -1, 0, 0);
+                rc = xc_hvm_inject_trap(xch, domain_id, req.vcpu_id,
+                                        X86_TRAP_INT3,
+                                        req.u.software_breakpoint.type, -1,
+                                        req.u.software_breakpoint.insn_length, 0);
                 if (rc < 0)
                 {
                     ERROR("Error %d injecting breakpoint\n", rc);
@@ -711,6 +737,27 @@  int main(int argc, char *argv[])
                 rsp.flags |= VM_EVENT_FLAG_TOGGLE_SINGLESTEP;
 
                 break;
+            case VM_EVENT_REASON_DEBUG_EXCEPTION:
+                printf("Debug exception: rip=%016"PRIx64", vcpu %d. Type: %u. Length: %u\n",
+                       req.data.regs.x86.rip,
+                       req.vcpu_id,
+                       req.u.debug_exception.type,
+                       req.u.debug_exception.insn_length);
+
+                /* Reinject */
+                rc = xc_hvm_inject_trap(xch, domain_id, req.vcpu_id,
+                                        X86_TRAP_DEBUG,
+                                        req.u.debug_exception.type, -1,
+                                        req.u.debug_exception.insn_length,
+                                        req.data.regs.x86.cr2);
+                if (rc < 0)
+                {
+                    ERROR("Error %d injecting breakpoint\n", rc);
+                    interrupted = -1;
+                    continue;
+                }
+
+                break;
             default:
                 fprintf(stderr, "UNKNOWN REASON CODE %d\n", req.reason);
             }
diff --git a/xen/arch/x86/hvm/monitor.c b/xen/arch/x86/hvm/monitor.c
index 8b41b56..5e040ed 100644
--- a/xen/arch/x86/hvm/monitor.c
+++ b/xen/arch/x86/hvm/monitor.c
@@ -88,12 +88,14 @@  static inline unsigned long gfn_of_rip(unsigned long rip)
     return paging_gva_to_gfn(curr, sreg.base + rip, &pfec);
 }
 
-int hvm_monitor_breakpoint(unsigned long rip,
-                           enum hvm_monitor_breakpoint_type type)
+int hvm_monitor_debug(unsigned long rip, enum hvm_monitor_debug_type type,
+                      unsigned long trap_type, unsigned long insn_length)
 {
     struct vcpu *curr = current;
     struct arch_domain *ad = &curr->domain->arch;
     vm_event_request_t req = {};
+    bool_t sync;
+    int rc;
 
     switch ( type )
     {
@@ -102,6 +104,9 @@  int hvm_monitor_breakpoint(unsigned long rip,
             return 0;
         req.reason = VM_EVENT_REASON_SOFTWARE_BREAKPOINT;
         req.u.software_breakpoint.gfn = gfn_of_rip(rip);
+        req.u.software_breakpoint.type = trap_type;
+        req.u.software_breakpoint.insn_length = insn_length;
+        sync = 1;
         break;
 
     case HVM_MONITOR_SINGLESTEP_BREAKPOINT:
@@ -109,6 +114,17 @@  int hvm_monitor_breakpoint(unsigned long rip,
             return 0;
         req.reason = VM_EVENT_REASON_SINGLESTEP;
         req.u.singlestep.gfn = gfn_of_rip(rip);
+        sync = 1;
+        break;
+
+    case HVM_MONITOR_DEBUG_EXCEPTION:
+        if ( !ad->monitor.debug_exception_enabled )
+            return 0;
+        req.reason = VM_EVENT_REASON_DEBUG_EXCEPTION;
+        req.u.debug_exception.gfn = gfn_of_rip(rip);
+        req.u.debug_exception.type = trap_type;
+        req.u.debug_exception.insn_length = insn_length;
+        sync = !!ad->monitor.debug_exception_sync;
         break;
 
     default:
@@ -117,7 +133,11 @@  int hvm_monitor_breakpoint(unsigned long rip,
 
     req.vcpu_id = curr->vcpu_id;
 
-    return vm_event_monitor_traps(curr, 1, &req);
+    rc = vm_event_monitor_traps(curr, sync, &req);
+    if ( type == HVM_MONITOR_DEBUG_EXCEPTION && rc == 1 && !sync )
+        rc = 0;
+
+    return rc;
 }
 
 /*
diff --git a/xen/arch/x86/hvm/vmx/vmx.c b/xen/arch/x86/hvm/vmx/vmx.c
index 081aef1..cea0761 100644
--- a/xen/arch/x86/hvm/vmx/vmx.c
+++ b/xen/arch/x86/hvm/vmx/vmx.c
@@ -3377,9 +3377,25 @@  void vmx_vmexit_handler(struct cpu_user_regs *regs)
             HVMTRACE_1D(TRAP_DEBUG, exit_qualification);
             write_debugreg(6, exit_qualification | DR_STATUS_RESERVED_ONE);
             if ( !v->domain->debugger_attached )
-                vmx_propagate_intr(intr_info);
+            {
+                unsigned long insn_length = 0;
+                int handled;
+                unsigned long trap_type = MASK_EXTR(intr_info,
+                                                    INTR_INFO_INTR_TYPE_MASK);
+
+                if( trap_type >= X86_EVENTTYPE_SW_INTERRUPT )
+                    __vmread(VM_EXIT_INSTRUCTION_LEN, &insn_length);
+
+                handled = hvm_monitor_debug(regs->eip,
+                                            HVM_MONITOR_DEBUG_EXCEPTION,
+                                            trap_type, insn_length);
+                if ( handled <= 0 )
+                    vmx_propagate_intr(intr_info);
+
+            }
             else
                 domain_pause_for_debugger();
+
             break;
         case TRAP_int3: 
         {
@@ -3393,8 +3409,9 @@  void vmx_vmexit_handler(struct cpu_user_regs *regs)
             }
             else {
                 int handled =
-                    hvm_monitor_breakpoint(regs->eip,
-                                           HVM_MONITOR_SOFTWARE_BREAKPOINT);
+                        hvm_monitor_debug(regs->eip,
+                                          HVM_MONITOR_SOFTWARE_BREAKPOINT,
+                                          X86_EVENTTYPE_SW_EXCEPTION, 1);
 
                 if ( handled < 0 ) 
                 {
@@ -3721,8 +3738,7 @@  void vmx_vmexit_handler(struct cpu_user_regs *regs)
         vmx_update_cpu_exec_control(v);
         if ( v->arch.hvm_vcpu.single_step )
         {
-            hvm_monitor_breakpoint(regs->eip,
-                                   HVM_MONITOR_SINGLESTEP_BREAKPOINT);
+            hvm_monitor_debug(regs->eip, HVM_MONITOR_SINGLESTEP_BREAKPOINT, 0, 0);
             if ( v->domain->debugger_attached )
                 domain_pause_for_debugger();
         }
diff --git a/xen/arch/x86/monitor.c b/xen/arch/x86/monitor.c
index 621f91a..e8b79f6 100644
--- a/xen/arch/x86/monitor.c
+++ b/xen/arch/x86/monitor.c
@@ -124,6 +124,22 @@  int arch_monitor_domctl_event(struct domain *d,
         break;
     }
 
+    case XEN_DOMCTL_MONITOR_EVENT_DEBUG_EXCEPTION:
+    {
+        bool_t old_status = ad->monitor.debug_exception_enabled;
+
+        if ( unlikely(old_status == requested_status) )
+            return -EEXIST;
+
+        domain_pause(d);
+        ad->monitor.debug_exception_enabled = requested_status;
+        ad->monitor.debug_exception_sync = requested_status ?
+                                            mop->u.debug_exception.sync :
+                                            0;
+        domain_unpause(d);
+        break;
+    }
+
     default:
         /*
          * Should not be reached unless arch_monitor_get_capabilities() is
diff --git a/xen/include/asm-x86/domain.h b/xen/include/asm-x86/domain.h
index 165e533..1cf97c3 100644
--- a/xen/include/asm-x86/domain.h
+++ b/xen/include/asm-x86/domain.h
@@ -405,6 +405,8 @@  struct arch_domain
         unsigned int mov_to_msr_extended         : 1;
         unsigned int singlestep_enabled          : 1;
         unsigned int software_breakpoint_enabled : 1;
+        unsigned int debug_exception_enabled     : 1;
+        unsigned int debug_exception_sync        : 1;
     } monitor;
 
     /* Mem_access emulation control */
diff --git a/xen/include/asm-x86/hvm/monitor.h b/xen/include/asm-x86/hvm/monitor.h
index 55d435e..8b0f119 100644
--- a/xen/include/asm-x86/hvm/monitor.h
+++ b/xen/include/asm-x86/hvm/monitor.h
@@ -23,10 +23,11 @@ 
 #include <xen/paging.h>
 #include <public/vm_event.h>
 
-enum hvm_monitor_breakpoint_type
+enum hvm_monitor_debug_type
 {
     HVM_MONITOR_SOFTWARE_BREAKPOINT,
     HVM_MONITOR_SINGLESTEP_BREAKPOINT,
+    HVM_MONITOR_DEBUG_EXCEPTION,
 };
 
 /*
@@ -39,8 +40,8 @@  bool_t hvm_monitor_cr(unsigned int index, unsigned long value,
 #define hvm_monitor_crX(cr, new, old) \
                         hvm_monitor_cr(VM_EVENT_X86_##cr, new, old)
 void hvm_monitor_msr(unsigned int msr, uint64_t value);
-int hvm_monitor_breakpoint(unsigned long rip,
-                           enum hvm_monitor_breakpoint_type type);
+int hvm_monitor_debug(unsigned long rip, enum hvm_monitor_debug_type type,
+                      unsigned long trap_type, unsigned long insn_length);
 
 #endif /* __ASM_X86_HVM_MONITOR_H__ */
 
diff --git a/xen/include/asm-x86/monitor.h b/xen/include/asm-x86/monitor.h
index 0fee750..74a08f0f 100644
--- a/xen/include/asm-x86/monitor.h
+++ b/xen/include/asm-x86/monitor.h
@@ -74,7 +74,8 @@  static inline uint32_t arch_monitor_get_capabilities(struct domain *d)
     capabilities = (1U << XEN_DOMCTL_MONITOR_EVENT_WRITE_CTRLREG) |
                    (1U << XEN_DOMCTL_MONITOR_EVENT_MOV_TO_MSR) |
                    (1U << XEN_DOMCTL_MONITOR_EVENT_SOFTWARE_BREAKPOINT) |
-                   (1U << XEN_DOMCTL_MONITOR_EVENT_GUEST_REQUEST);
+                   (1U << XEN_DOMCTL_MONITOR_EVENT_GUEST_REQUEST) |
+                   (1U << XEN_DOMCTL_MONITOR_EVENT_DEBUG_EXCEPTION);
 
     /* Since we know this is on VMX, we can just call the hvm func */
     if ( hvm_is_singlestep_supported() )
diff --git a/xen/include/public/domctl.h b/xen/include/public/domctl.h
index 35adce2..b69b1bb 100644
--- a/xen/include/public/domctl.h
+++ b/xen/include/public/domctl.h
@@ -1081,6 +1081,7 @@  DEFINE_XEN_GUEST_HANDLE(xen_domctl_psr_cmt_op_t);
 #define XEN_DOMCTL_MONITOR_EVENT_SOFTWARE_BREAKPOINT   3
 #define XEN_DOMCTL_MONITOR_EVENT_GUEST_REQUEST         4
 #define XEN_DOMCTL_MONITOR_EVENT_PRIVILEGED_CALL       5
+#define XEN_DOMCTL_MONITOR_EVENT_DEBUG_EXCEPTION       6
 
 struct xen_domctl_monitor_op {
     uint32_t op; /* XEN_DOMCTL_MONITOR_OP_* */
@@ -1116,6 +1117,11 @@  struct xen_domctl_monitor_op {
             /* Pause vCPU until response */
             uint8_t sync;
         } guest_request;
+
+        struct {
+            /* Pause vCPU until response */
+            uint8_t sync;
+        } debug_exception;
     } u;
 };
 typedef struct xen_domctl_monitor_op xen_domctl_monitor_op_t;
diff --git a/xen/include/public/vm_event.h b/xen/include/public/vm_event.h
index 6ff7cc0..13855b5 100644
--- a/xen/include/public/vm_event.h
+++ b/xen/include/public/vm_event.h
@@ -29,7 +29,7 @@ 
 
 #include "xen.h"
 
-#define VM_EVENT_INTERFACE_VERSION 0x00000001
+#define VM_EVENT_INTERFACE_VERSION 0x00000002
 
 #if defined(__XEN__) || defined(__XEN_TOOLS__)
 
@@ -121,6 +121,8 @@ 
 #define VM_EVENT_REASON_GUEST_REQUEST           8
 /* Privileged call executed (e.g. SMC) */
 #define VM_EVENT_REASON_PRIVILEGED_CALL         9
+/* A debug exception was caught */
+#define VM_EVENT_REASON_DEBUG_EXCEPTION         10
 
 /* Supported values for the vm_event_write_ctrlreg index. */
 #define VM_EVENT_X86_CR0    0
@@ -258,8 +260,15 @@  struct vm_event_write_ctrlreg {
     uint64_t old_value;
 };
 
+struct vm_event_singlestep {
+    uint64_t gfn;
+};
+
 struct vm_event_debug {
     uint64_t gfn;
+    uint8_t type;        /* HVMOP_TRAP_* */
+    uint8_t insn_length;
+    uint8_t _pad[6];
 };
 
 struct vm_event_mov_to_msr {
@@ -303,7 +312,8 @@  typedef struct vm_event_st {
         struct vm_event_write_ctrlreg         write_ctrlreg;
         struct vm_event_mov_to_msr            mov_to_msr;
         struct vm_event_debug                 software_breakpoint;
-        struct vm_event_debug                 singlestep;
+        struct vm_event_singlestep            singlestep;
+        struct vm_event_debug                 debug_exception;
     } u;
 
     union {