diff mbox

[v2,2/2] x86/traps: Dump instruction stream in show_execution_state()

Message ID 1455187972-30566-2-git-send-email-andrew.cooper3@citrix.com (mailing list archive)
State New, archived
Headers show

Commit Message

Andrew Cooper Feb. 11, 2016, 10:52 a.m. UTC
For first pass triage of crashes, it is useful to have the instruction
stream present, especially now that Xen binary patches itself.

A sample output now looks like:

(XEN) ----[ Xen-4.7-unstable  x86_64  debug=y  Not tainted ]----
(XEN) CPU:    0
(XEN) RIP:    e008:[<ffff82d0801607e4>] default_idle+0x76/0x7b
(XEN) RFLAGS: 0000000000000246   CONTEXT: hypervisor
(XEN) rax: ffff82d080331030   rbx: ffff83007fce8000   rcx: 0000000000000000
(XEN) rdx: 0000000000000000   rsi: ffff82d080331b98   rdi: 0000000000000000
(XEN) rbp: ffff83007fcefef0   rsp: ffff83007fcefef0   r8:  ffff83007faf8118
(XEN) r9:  00000009983e89fd   r10: 00000009983e89fd   r11: 0000000000000246
(XEN) r12: ffff83007fd61000   r13: 00000000ffffffff   r14: ffff83007fad9000
(XEN) r15: ffff83007fae3000   cr0: 000000008005003b   cr4: 00000000000026e0
(XEN) cr3: 000000007fc9b000   cr2: 00007f70976b3fed
(XEN) ds: 0000   es: 0000   fs: 0000   gs: 0000   ss: e010   cs: e008
(XEN) Xen code around <ffff82d0801607e4> (default_idle+0x76/0x7b):
(XEN)  83 3c 10 00 75 04 fb f4 <eb> 01 fb 5d c3 55 48 89 e5 3b 3d 0d 50 12 00 72
(XEN) Xen stack trace from rsp=ffff83007fcefef0:
(XEN)    ffff83007fceff10 ffff82d080160e08 ffff82d08012c40a ffff83007faf9000
(XEN)    ffff83007fcefdd8 ffffffff81a01fd8 ffff88002f07d4c0 ffffffff81a01fd8
(XEN)    0000000000000000 ffffffff81a01e58 ffffffff81a01fd8 0000000000000246
(XEN)    00000000ffff0052 0000000000000000 0000000000000000 0000000000000000
(XEN)    ffffffff810013aa 0000000000000001 00000000deadbeef 00000000deadbeef
(XEN)    0000010000000000 ffffffff810013aa 000000000000e033 0000000000000246
(XEN)    ffffffff81a01e40 000000000000e02b 0000000000000000 0000000000000000
(XEN)    0000000000000000 0000000000000000 0000000000000000 ffff83007faf9000
(XEN)    0000000000000000 0000000000000000
(XEN) Xen call trace:
(XEN)    [<ffff82d0801607e4>] default_idle+0x76/0x7b
(XEN)    [<ffff82d080160e08>] idle_loop+0x51/0x6e
(XEN)

Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
---
CC: Jan Beulich <JBeulich@suse.com>

v2: Deal with %rip wrapping.  In such a case, insert dashes into printed line.
---
 xen/arch/x86/traps.c | 51 +++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 51 insertions(+)

Comments

Jan Beulich Feb. 11, 2016, 11:16 a.m. UTC | #1
>>> On 11.02.16 at 11:52, <andrew.cooper3@citrix.com> wrote:
> v2: Deal with %rip wrapping.  In such a case, insert dashes into printed 
> line.

I don't think this deals with wrapping now, since ...

> --- a/xen/arch/x86/traps.c
> +++ b/xen/arch/x86/traps.c
> @@ -114,6 +114,56 @@ boolean_param("ler", opt_ler);
>  #define stack_words_per_line 4
>  #define ESP_BEFORE_EXCEPTION(regs) ((unsigned long *)regs->rsp)
>  
> +static void show_code(const struct cpu_user_regs *regs)
> +{
> +    unsigned char insns_before[8], insns_after[16];
> +    unsigned int i, missing_before, missing_after;
> +
> +    if ( guest_mode(regs) )
> +        return;
> +
> +    /*
> +     * This dance with {insns,missing}_{before,after} is to ensure that, if
> +     * %rip -8/+16 wraps around a bounday, we read a non-wrapped regs->rip
> +     * pointer, and calculate which bytes were not read so they may be
> +     * replaced with dashes in the printed output.
> +     */
> +    missing_before = __copy_from_user(
> +        insns_before, (void __user *)regs->rip - 8, ARRAY_SIZE(insns_before));
> +    missing_after = __copy_from_user(
> +        insns_after, (void __user *)regs->rip, ARRAY_SIZE(insns_after));

... iirc __copy_from_user() doesn't range check the addresses.
Also reading the leading bytes is done in kind of a strange way: It'll
read initial bytes (farther away from RIP) and perhaps not read
later ones (closer to RIP), albeit clearly the ones closer are of more
interest. In the extreme case, where RIP is only a few bytes into a
page following an unmapped one, no leading bytes would be printed
at all despite some actually being readable.

Avoiding actual wrapping could be easily done by extending the
guest_mode() check above to also range check RIP against the
hypervisor image boundaries (post-boot this could even be limited
further, but perhaps using the full XEN_VIRT_{START,END} range is
the better route with xSplice in mind.

And then there's a literal 8 left in the first function invocation
above, which imo would better also be ARRAY_SIZE(insns_before)
(albeit with the comment above this invocation is going to change
anyway).

Jan
Andrew Cooper Feb. 11, 2016, 12:12 p.m. UTC | #2
On 11/02/16 11:16, Jan Beulich wrote:
>>>> On 11.02.16 at 11:52, <andrew.cooper3@citrix.com> wrote:
>> v2: Deal with %rip wrapping.  In such a case, insert dashes into printed 
>> line.
> I don't think this deals with wrapping now, since ...
>
>> --- a/xen/arch/x86/traps.c
>> +++ b/xen/arch/x86/traps.c
>> @@ -114,6 +114,56 @@ boolean_param("ler", opt_ler);
>>  #define stack_words_per_line 4
>>  #define ESP_BEFORE_EXCEPTION(regs) ((unsigned long *)regs->rsp)
>>  
>> +static void show_code(const struct cpu_user_regs *regs)
>> +{
>> +    unsigned char insns_before[8], insns_after[16];
>> +    unsigned int i, missing_before, missing_after;
>> +
>> +    if ( guest_mode(regs) )
>> +        return;
>> +
>> +    /*
>> +     * This dance with {insns,missing}_{before,after} is to ensure that, if
>> +     * %rip -8/+16 wraps around a bounday, we read a non-wrapped regs->rip
>> +     * pointer, and calculate which bytes were not read so they may be
>> +     * replaced with dashes in the printed output.
>> +     */
>> +    missing_before = __copy_from_user(
>> +        insns_before, (void __user *)regs->rip - 8, ARRAY_SIZE(insns_before));
>> +    missing_after = __copy_from_user(
>> +        insns_after, (void __user *)regs->rip, ARRAY_SIZE(insns_after));
> ... iirc __copy_from_user() doesn't range check the addresses.
> Also reading the leading bytes is done in kind of a strange way: It'll
> read initial bytes (farther away from RIP) and perhaps not read
> later ones (closer to RIP), albeit clearly the ones closer are of more
> interest. In the extreme case, where RIP is only a few bytes into a
> page following an unmapped one, no leading bytes would be printed
> at all despite some actually being readable.

I think in this specific case, it might be best to hand roll some asm
using rep movs and the direction flag, with manual fault handling.

>
> Avoiding actual wrapping could be easily done by extending the
> guest_mode() check above to also range check RIP against the
> hypervisor image boundaries (post-boot this could even be limited
> further, but perhaps using the full XEN_VIRT_{START,END} range is
> the better route with xSplice in mind.

I would like, where possible, to avoid omitting the instruction stream
if Xen is outside of its expected boundaries.

~Andrew
Jan Beulich Feb. 11, 2016, 12:52 p.m. UTC | #3
>>> On 11.02.16 at 13:12, <andrew.cooper3@citrix.com> wrote:
> On 11/02/16 11:16, Jan Beulich wrote:
>>>>> On 11.02.16 at 11:52, <andrew.cooper3@citrix.com> wrote:
>>> --- a/xen/arch/x86/traps.c
>>> +++ b/xen/arch/x86/traps.c
>>> @@ -114,6 +114,56 @@ boolean_param("ler", opt_ler);
>>>  #define stack_words_per_line 4
>>>  #define ESP_BEFORE_EXCEPTION(regs) ((unsigned long *)regs->rsp)
>>>  
>>> +static void show_code(const struct cpu_user_regs *regs)
>>> +{
>>> +    unsigned char insns_before[8], insns_after[16];
>>> +    unsigned int i, missing_before, missing_after;
>>> +
>>> +    if ( guest_mode(regs) )
>>> +        return;
>>> +
>>> +    /*
>>> +     * This dance with {insns,missing}_{before,after} is to ensure that, if
>>> +     * %rip -8/+16 wraps around a bounday, we read a non-wrapped regs->rip
>>> +     * pointer, and calculate which bytes were not read so they may be
>>> +     * replaced with dashes in the printed output.
>>> +     */
>>> +    missing_before = __copy_from_user(
>>> +        insns_before, (void __user *)regs->rip - 8, ARRAY_SIZE(insns_before));
>>> +    missing_after = __copy_from_user(
>>> +        insns_after, (void __user *)regs->rip, ARRAY_SIZE(insns_after));
>> ... iirc __copy_from_user() doesn't range check the addresses.
>> Also reading the leading bytes is done in kind of a strange way: It'll
>> read initial bytes (farther away from RIP) and perhaps not read
>> later ones (closer to RIP), albeit clearly the ones closer are of more
>> interest. In the extreme case, where RIP is only a few bytes into a
>> page following an unmapped one, no leading bytes would be printed
>> at all despite some actually being readable.
> 
> I think in this specific case, it might be best to hand roll some asm
> using rep movs and the direction flag, with manual fault handling.

That's certainly an option.

>> Avoiding actual wrapping could be easily done by extending the
>> guest_mode() check above to also range check RIP against the
>> hypervisor image boundaries (post-boot this could even be limited
>> further, but perhaps using the full XEN_VIRT_{START,END} range is
>> the better route with xSplice in mind.
> 
> I would like, where possible, to avoid omitting the instruction stream
> if Xen is outside of its expected boundaries.

Which is because of ...? What useful information do you think can
be gained from the actual instruction when the mere fact of being
outside the boundaries is a bug?

Jan
Andrew Cooper Feb. 11, 2016, 1:41 p.m. UTC | #4
On 11/02/16 12:52, Jan Beulich wrote:
>>>> On 11.02.16 at 13:12, <andrew.cooper3@citrix.com> wrote:
>> On 11/02/16 11:16, Jan Beulich wrote:
>>>>>> On 11.02.16 at 11:52, <andrew.cooper3@citrix.com> wrote:
>>>> --- a/xen/arch/x86/traps.c
>>>> +++ b/xen/arch/x86/traps.c
>>>> @@ -114,6 +114,56 @@ boolean_param("ler", opt_ler);
>>>>  #define stack_words_per_line 4
>>>>  #define ESP_BEFORE_EXCEPTION(regs) ((unsigned long *)regs->rsp)
>>>>  
>>>> +static void show_code(const struct cpu_user_regs *regs)
>>>> +{
>>>> +    unsigned char insns_before[8], insns_after[16];
>>>> +    unsigned int i, missing_before, missing_after;
>>>> +
>>>> +    if ( guest_mode(regs) )
>>>> +        return;
>>>> +
>>>> +    /*
>>>> +     * This dance with {insns,missing}_{before,after} is to ensure that, if
>>>> +     * %rip -8/+16 wraps around a bounday, we read a non-wrapped regs->rip
>>>> +     * pointer, and calculate which bytes were not read so they may be
>>>> +     * replaced with dashes in the printed output.
>>>> +     */
>>>> +    missing_before = __copy_from_user(
>>>> +        insns_before, (void __user *)regs->rip - 8, ARRAY_SIZE(insns_before));
>>>> +    missing_after = __copy_from_user(
>>>> +        insns_after, (void __user *)regs->rip, ARRAY_SIZE(insns_after));
>>> ... iirc __copy_from_user() doesn't range check the addresses.
>>> Also reading the leading bytes is done in kind of a strange way: It'll
>>> read initial bytes (farther away from RIP) and perhaps not read
>>> later ones (closer to RIP), albeit clearly the ones closer are of more
>>> interest. In the extreme case, where RIP is only a few bytes into a
>>> page following an unmapped one, no leading bytes would be printed
>>> at all despite some actually being readable.
>> I think in this specific case, it might be best to hand roll some asm
>> using rep movs and the direction flag, with manual fault handling.
> That's certainly an option.

It is turning out to be much nicer.

>
>>> Avoiding actual wrapping could be easily done by extending the
>>> guest_mode() check above to also range check RIP against the
>>> hypervisor image boundaries (post-boot this could even be limited
>>> further, but perhaps using the full XEN_VIRT_{START,END} range is
>>> the better route with xSplice in mind.
>> I would like, where possible, to avoid omitting the instruction stream
>> if Xen is outside of its expected boundaries.
> Which is because of ...? What useful information do you think can
> be gained from the actual instruction when the mere fact of being
> outside the boundaries is a bug?

Wherever %rip is pointing, the code under %rip is directly relevant to
the exact values of the registers and stack dump printed.

It will be obvious from the numeric value of %rip whether it is bad
(also, whether symbol information is found), and making it work for all
cases is easier than restricting to the Xen-only case.

~Andrew
Jan Beulich Feb. 11, 2016, 1:55 p.m. UTC | #5
>>> On 11.02.16 at 14:41, <andrew.cooper3@citrix.com> wrote:
> Wherever %rip is pointing, the code under %rip is directly relevant to
> the exact values of the registers and stack dump printed.
> 
> It will be obvious from the numeric value of %rip whether it is bad
> (also, whether symbol information is found), and making it work for all
> cases is easier than restricting to the Xen-only case.

Let's see (at the example of v3) whether this "easier" actually holds...

Jan
diff mbox

Patch

diff --git a/xen/arch/x86/traps.c b/xen/arch/x86/traps.c
index ab7deee..5146e69 100644
--- a/xen/arch/x86/traps.c
+++ b/xen/arch/x86/traps.c
@@ -114,6 +114,56 @@  boolean_param("ler", opt_ler);
 #define stack_words_per_line 4
 #define ESP_BEFORE_EXCEPTION(regs) ((unsigned long *)regs->rsp)
 
+static void show_code(const struct cpu_user_regs *regs)
+{
+    unsigned char insns_before[8], insns_after[16];
+    unsigned int i, missing_before, missing_after;
+
+    if ( guest_mode(regs) )
+        return;
+
+    /*
+     * This dance with {insns,missing}_{before,after} is to ensure that, if
+     * %rip -8/+16 wraps around a bounday, we read a non-wrapped regs->rip
+     * pointer, and calculate which bytes were not read so they may be
+     * replaced with dashes in the printed output.
+     */
+    missing_before = __copy_from_user(
+        insns_before, (void __user *)regs->rip - 8, ARRAY_SIZE(insns_before));
+    missing_after = __copy_from_user(
+        insns_after, (void __user *)regs->rip, ARRAY_SIZE(insns_after));
+
+    printk("Xen code around <%p> (%ps)%s:\n",
+           _p(regs->rip), _p(regs->rip),
+           (missing_before || missing_after) ? " [fault on access]" : "");
+
+    /* Print bytes from insns_before[]. */
+    for ( i = 0; i < ARRAY_SIZE(insns_before); ++i )
+    {
+        if ( i < (ARRAY_SIZE(insns_before) - missing_before) )
+            printk(" %02x", insns_before[i]);
+        else
+            printk(" --");
+    }
+
+    /* Print the byte under %rip. */
+    if ( missing_after != ARRAY_SIZE(insns_after) )
+        printk(" <%02x>", insns_after[0]);
+    else
+        printk(" <-->");
+
+    /* Print bytes from insns_after[]. */
+    for ( i = 1; i < ARRAY_SIZE(insns_after); ++i )
+    {
+        if ( i < (ARRAY_SIZE(insns_after) - missing_after) )
+            printk(" %02x", insns_after[i]);
+        else
+            printk(" --");
+    }
+
+    printk("\n");
+}
+
 static void show_guest_stack(struct vcpu *v, const struct cpu_user_regs *regs)
 {
     int i;
@@ -420,6 +470,7 @@  void show_execution_state(const struct cpu_user_regs *regs)
     unsigned long flags = console_lock_recursive_irqsave();
 
     show_registers(regs);
+    show_code(regs);
     show_stack(regs);
 
     console_unlock_recursive_irqrestore(flags);