diff mbox

[03/15] xen/tools: tracing: several improvements on IRQs tracing

Message ID 149633843555.12814.16573412358602741145.stgit@Solace.fritz.box (mailing list archive)
State New, archived
Headers show

Commit Message

Dario Faggioli June 1, 2017, 5:33 p.m. UTC
More specifically:
 - the handling of the TRC_HW_IRQ_HANDLED is fixed, both in
   xentrace_format and in xenalyze;
 - simple events for recording when we enter and exit the
   do_IRQ function, as well as when we deal with a guest
   IRQ, are added;
 - tracing of IRQs handled with direct vectors is also
   added.

With all the above, a trace will now look like this (using
xenalyze):

 0.001299072 .x- d32768v5 irq_enter, irq 80000000
 0.001299072 .x- d32768v5 irq_direct, vec fa, handler = 0xffff82d08026d7e4
 0.001300014 .x- d32768v5 raise_softirq nr 0
 0.001300487 .x- d32768v5 irq_exit irq 80000000, in_irq = 0

Or like this:

 0.049437892 -|- d32767v12 irq_enter, irq 4
 0.049437892 -|- d32767v12 irq_handled irq 4, 85428 cycles
 0.049474336 -|- d32767v12 irq_exit irq 4, status = 0x0, in_irq = 0

Making it much easier to figure out when interrupt
processing start, what it does and when it ends.

Signed-off-by: Dario Faggioli <dario.faggioli@citrix.com>
---
Cc: George Dunlap <george.dunlap@eu.citrix.com>
Cc: Ian Jackson <ian.jackson@eu.citrix.com>
Cc: Wei Liu <wei.liu2@citrix.com>
Cc: Jan Beulich <jbeulich@suse.com>
Cc: Andrew Cooper <andrew.cooper3@citrix.com>
---
 tools/xentrace/formats     |    6 ++++
 tools/xentrace/xenalyze.c  |   56 +++++++++++++++++++++++++++++++++++-----
 xen/arch/x86/irq.c         |   61 +++++++++++++++++++++++++++++++++++++-------
 xen/include/public/trace.h |    4 +++
 4 files changed, 109 insertions(+), 18 deletions(-)

Comments

Andrew Cooper June 1, 2017, 6:02 p.m. UTC | #1
On 01/06/17 18:33, Dario Faggioli wrote:
> More specifically:
>  - the handling of the TRC_HW_IRQ_HANDLED is fixed, both in
>    xentrace_format and in xenalyze;
>  - simple events for recording when we enter and exit the
>    do_IRQ function, as well as when we deal with a guest
>    IRQ, are added;
>  - tracing of IRQs handled with direct vectors is also
>    added.
>
> With all the above, a trace will now look like this (using
> xenalyze):
>
>  0.001299072 .x- d32768v5 irq_enter, irq 80000000
>  0.001299072 .x- d32768v5 irq_direct, vec fa, handler = 0xffff82d08026d7e4

Please consistently use 0x$hex.  vec in particular has 10/16ths of its
values which are completely ambiguous between hex and decimal.

~Andrew

>  0.001300014 .x- d32768v5 raise_softirq nr 0
>  0.001300487 .x- d32768v5 irq_exit irq 80000000, in_irq = 0
>
> Or like this:
>
>  0.049437892 -|- d32767v12 irq_enter, irq 4
>  0.049437892 -|- d32767v12 irq_handled irq 4, 85428 cycles
>  0.049474336 -|- d32767v12 irq_exit irq 4, status = 0x0, in_irq = 0
>
> Making it much easier to figure out when interrupt
> processing start, what it does and when it ends.
>
> Signed-off-by: Dario Faggioli <dario.faggioli@citrix.com>
> ---
> Cc: George Dunlap <george.dunlap@eu.citrix.com>
> Cc: Ian Jackson <ian.jackson@eu.citrix.com>
> Cc: Wei Liu <wei.liu2@citrix.com>
> Cc: Jan Beulich <jbeulich@suse.com>
> Cc: Andrew Cooper <andrew.cooper3@citrix.com>
> ---
>  tools/xentrace/formats     |    6 ++++
>  tools/xentrace/xenalyze.c  |   56 +++++++++++++++++++++++++++++++++++-----
>  xen/arch/x86/irq.c         |   61 +++++++++++++++++++++++++++++++++++++-------
>  xen/include/public/trace.h |    4 +++
>  4 files changed, 109 insertions(+), 18 deletions(-)
>
> diff --git a/tools/xentrace/formats b/tools/xentrace/formats
> index 8b31780..00c29b8 100644
> --- a/tools/xentrace/formats
> +++ b/tools/xentrace/formats
> @@ -197,7 +197,11 @@
>  0x00802005  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  move_vector [ irq = %(1)d had vector 0x%(2)x on CPU%(3)d ]
>  0x00802006  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  assign_vector [ irq = %(1)d = vector 0x%(2)x, CPU mask: 0x%(3)08x ]
>  0x00802007  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  bogus_vector [ 0x%(1)x ]
> -0x00802008  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  do_irq [ irq = %(1)d, began = %(2)dus, ended = %(3)dus ]
> +0x00802008  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  irq_handled [ irq = %(1)d, dur = 0x%(3)08x%(2)08x ]
> +0x00802009  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  direct_vector [ vector 0x%(1)x, handler = 0x%(3)08x%(2)08x ]
> +0x0080200a  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  do_irq [ irq = %(1)d ]
> +0x0080200b  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  do_guest_irq [ irq = %(1)d ]
> +0x0080200c  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  irq_exit [ irq = %(1)d, status = 0x%(2)x, in_irq = %(3)d ]
>  
>  0x00084001  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  hpet create [ tn = %(1)d, irq = %(2)d, delta = 0x%(4)08x%(3)08x, period = 0x%(6)08x%(5)08x ]
>  0x00084002  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  pit create [ delta = 0x%(1)016x, period = 0x%(2)016x ]
> diff --git a/tools/xentrace/xenalyze.c b/tools/xentrace/xenalyze.c
> index fa608ad..063eee7 100644
> --- a/tools/xentrace/xenalyze.c
> +++ b/tools/xentrace/xenalyze.c
> @@ -8380,19 +8380,45 @@ void irq_process(struct pcpu_info *p) {
>          }
>          break;
>      }
> -    case TRC_HW_IRQ_HANDLED:
> +    case TRC_HW_IRQ_ENTER:
> +    case TRC_HW_IRQ_GUEST:
>      {
>          struct {
> -            int irq, start_tsc, end_tsc;
> +            int32_t irq;
>          } *r = (typeof(r))ri->d;
> -        int arctime;
>  
> -        arctime = r->end_tsc - r->start_tsc;
>          if ( opt.dump_all )
>          {
> -            printf(" %s irq_handled irq %x %d (%d,%d)\n",
> -                   ri->dump_header,
> -                   r->irq, arctime, r->start_tsc, r->end_tsc);
> +            printf(" %s irq_%s, irq %x\n", ri->dump_header,
> +                   ri->event == TRC_HW_IRQ_ENTER ? "enter" : "guest", r->irq);

irq %#x

> +        }
> +        break;
> +    }
> +    case TRC_HW_IRQ_DIRECT_VECTOR:
> +    {
> +        struct {
> +            uint32_t vec;
> +            uint64_t handler;
> +        } __attribute__((packed)) *r = (typeof(r))ri->d;
> +
> +        if ( opt.dump_all )
> +        {
> +            printf(" %s irq_direct, vec %x, handler = %p\n",
> +                   ri->dump_header, r->vec, (void*)r->handler);

vec %02x

> +        }
> +        break;
> +    }
> +    case TRC_HW_IRQ_HANDLED:
> +    {
> +        struct {
> +            int32_t irq;
> +            uint64_t arctime;
> +        } __attribute__((packed)) *r = (typeof(r))ri->d;
> +
> +        if ( opt.dump_all )
> +        {
> +            printf(" %s irq_handled irq %x, %"PRIu64" cycles\n",

irq %#x

> +                   ri->dump_header, r->irq, r->arctime);
>          }
>          if ( opt.scatterplot_irq )
>          {
> @@ -8407,6 +8433,22 @@ void irq_process(struct pcpu_info *p) {
>          }
>          break;
>      }
> +    case TRC_HW_IRQ_EXIT:
> +    {
> +        struct {
> +            int32_t irq, status;
> +            uint32_t in_irq;
> +        } *r = (typeof(r))ri->d;
> +
> +        if ( opt.dump_all )
> +        {
> +            printf(" %s irq_exit irq %x", ri->dump_header, r->irq);
> +            if ( r->status != -1 )
> +                printf(", status = 0x%x", r->status);
> +            printf(", in_irq = %d\n", r->in_irq);
> +        }
> +        break;
> +    }
>      case TRC_HW_IRQ_ASSIGN_VECTOR:
>      {
>          struct {
> diff --git a/xen/arch/x86/irq.c b/xen/arch/x86/irq.c
> index 676ba52..f5d9302 100644
> --- a/xen/arch/x86/irq.c
> +++ b/xen/arch/x86/irq.c
> @@ -100,11 +100,15 @@ static void trace_irq_mask(u32 event, int irq, int vector, cpumask_t *mask)
>          unsigned int irq:16, vec:16;
>          unsigned int mask[6];
>      } d;
> +
> +    if ( likely(!tb_init_done) )
> +        return;
> +
>      d.irq = irq;
>      d.vec = vector;
>      memset(d.mask, 0, sizeof(d.mask));
>      memcpy(d.mask, mask, min(sizeof(d.mask), sizeof(cpumask_t)));
> -    trace_var(event, 1, sizeof(d), &d);
> +    __trace_var(event, 1, sizeof(d), &d);
>  }
>  
>  static int __init __bind_irq_vector(int irq, int vector, const cpumask_t *cpu_mask)
> @@ -804,23 +808,54 @@ void alloc_direct_apic_vector(
>      spin_unlock(&lock);
>  }
>  
> +static inline void trace_irq_handled(int irq, uint64_t time)
> +{
> +    struct __packed {
> +        int32_t irq;
> +        uint64_t time;
> +    } d;
> +
> +    if ( likely(!tb_init_done) )
> +        return;
> +
> +    d.irq = irq;
> +    d.time = time;
> +    __trace_var(TRC_HW_IRQ_HANDLED, 0, sizeof(d), &d);
> +}
> +
>  void do_IRQ(struct cpu_user_regs *regs)
>  {
>      struct irqaction *action;
> -    uint32_t          tsc_in;
> -    struct irq_desc  *desc;
> +    uint64_t          tsc_in = 0;
> +    struct irq_desc  *desc = NULL;
>      unsigned int      vector = (u8)regs->entry_vector;
>      int irq = __get_cpu_var(vector_irq[vector]);
>      struct cpu_user_regs *old_regs = set_irq_regs(regs);
>      
>      perfc_incr(irqs);
>      this_cpu(irq_count)++;
> +    TRACE_1D(TRC_HW_IRQ_ENTER, irq);
>      irq_enter();
>  
> -    if (irq < 0) {
> -        if (direct_apic_vector[vector] != NULL) {
> +    if (irq < 0)

Spaces.

> +    {
> +        if (direct_apic_vector[vector] != NULL)

Spaces.

> +        {
> +            if ( unlikely(tb_init_done) )
> +            {
> +                struct __packed {
> +                    uint32_t vec;
> +                    uint64_t handler;
> +                } d;
> +
> +                d.vec = vector;
> +                d.handler = (uint64_t)direct_apic_vector[vector];

As there is no code inbetween, you can do this as a straight
initialisation of d.

i.e.

} d = { vector, (uint64_t)direct_apic_vector[vector] };

Otherwise, Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>

> +                __trace_var(TRC_HW_IRQ_DIRECT_VECTOR, 0, sizeof(d), &d);
> +            }
>              (*direct_apic_vector[vector])(regs);
> -        } else {
> +        }
> +        else
> +        {
>              const char *kind = ", LAPIC";
>  
>              if ( apic_isr_read(vector) )
> @@ -884,9 +919,13 @@ void do_IRQ(struct cpu_user_regs *regs)
>              desc->rl_quantum_start = now;
>          }
>  
> -        tsc_in = tb_init_done ? get_cycles() : 0;
> +        if ( unlikely(tb_init_done) )
> +        {
> +            __trace_var(TRC_HW_IRQ_GUEST, 0, sizeof(irq), &irq);
> +            tsc_in = get_cycles();
> +        }
>          __do_IRQ_guest(irq);
> -        TRACE_3D(TRC_HW_IRQ_HANDLED, irq, tsc_in, get_cycles());
> +        trace_irq_handled(irq, get_cycles() - tsc_in);
>          goto out_no_end;
>      }
>  
> @@ -907,9 +946,10 @@ void do_IRQ(struct cpu_user_regs *regs)
>      {
>          desc->status &= ~IRQ_PENDING;
>          spin_unlock_irq(&desc->lock);
> -        tsc_in = tb_init_done ? get_cycles() : 0;
> +        if ( unlikely(tb_init_done) )
> +            tsc_in = get_cycles();
>          action->handler(irq, action->dev_id, regs);
> -        TRACE_3D(TRC_HW_IRQ_HANDLED, irq, tsc_in, get_cycles());
> +        trace_irq_handled(irq, get_cycles() - tsc_in);
>          spin_lock_irq(&desc->lock);
>      }
>  
> @@ -922,6 +962,7 @@ void do_IRQ(struct cpu_user_regs *regs)
>      spin_unlock(&desc->lock);
>   out_no_unlock:
>      irq_exit();
> +    TRACE_3D(TRC_HW_IRQ_EXIT, irq, desc == NULL ? -1 : desc->status, in_irq());
>      set_irq_regs(old_regs);
>  }
>  
> diff --git a/xen/include/public/trace.h b/xen/include/public/trace.h
> index 7f2e891..f66a7af 100644
> --- a/xen/include/public/trace.h
> +++ b/xen/include/public/trace.h
> @@ -271,6 +271,10 @@
>  #define TRC_HW_IRQ_ASSIGN_VECTOR      (TRC_HW_IRQ + 0x6)
>  #define TRC_HW_IRQ_UNMAPPED_VECTOR    (TRC_HW_IRQ + 0x7)
>  #define TRC_HW_IRQ_HANDLED            (TRC_HW_IRQ + 0x8)
> +#define TRC_HW_IRQ_DIRECT_VECTOR      (TRC_HW_IRQ + 0x9)
> +#define TRC_HW_IRQ_ENTER              (TRC_HW_IRQ + 0xA)
> +#define TRC_HW_IRQ_GUEST              (TRC_HW_IRQ + 0xB)
> +#define TRC_HW_IRQ_EXIT               (TRC_HW_IRQ + 0xC)
>  
>  /*
>   * Event Flags
>
Dario Faggioli June 1, 2017, 11:12 p.m. UTC | #2
On Thu, 2017-06-01 at 19:02 +0100, Andrew Cooper wrote:
> On 01/06/17 18:33, Dario Faggioli wrote:
> > More specifically:
> >  - the handling of the TRC_HW_IRQ_HANDLED is fixed, both in
> >    xentrace_format and in xenalyze;
> >  - simple events for recording when we enter and exit the
> >    do_IRQ function, as well as when we deal with a guest
> >    IRQ, are added;
> >  - tracing of IRQs handled with direct vectors is also
> >    added.
> > 
> > With all the above, a trace will now look like this (using
> > xenalyze):
> > 
> >  0.001299072 .x- d32768v5 irq_enter, irq 80000000
> >  0.001299072 .x- d32768v5 irq_direct, vec fa, handler =
> > 0xffff82d08026d7e4
> 
> Please consistently use 0x$hex.  vec in particular has 10/16ths of
> its
> values which are completely ambiguous between hex and decimal.
> 
I fully agree. The file is not super consistent about this already, and
I was basically following the suit of the majority of existing cases.

But I really do agree, so I'll probably make a pre-patch, that converts
everyone to 0x$hex, and then use it in this patch as well.

> > +        {
> > +            if ( unlikely(tb_init_done) )
> > +            {
> > +                struct __packed {
> > +                    uint32_t vec;
> > +                    uint64_t handler;
> > +                } d;
> > +
> > +                d.vec = vector;
> > +                d.handler = (uint64_t)direct_apic_vector[vector];
> 
> As there is no code inbetween, you can do this as a straight
> initialisation of d.
> 
> i.e.
> 
> } d = { vector, (uint64_t)direct_apic_vector[vector] };
> 
> Otherwise, Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>
> 
Ok, thanks. I'll take care of this, and of all the others print format
and style comments.

Regards,
Dario
Jan Beulich June 7, 2017, 3:05 p.m. UTC | #3
>>> On 01.06.17 at 19:33, <dario.faggioli@citrix.com> wrote:
> More specifically:
>  - the handling of the TRC_HW_IRQ_HANDLED is fixed, both in
>    xentrace_format and in xenalyze;
>  - simple events for recording when we enter and exit the
>    do_IRQ function, as well as when we deal with a guest
>    IRQ, are added;

On x86. What about ARM?

>  - tracing of IRQs handled with direct vectors is also
>    added.
> 
> With all the above, a trace will now look like this (using
> xenalyze):
> 
>  0.001299072 .x- d32768v5 irq_enter, irq 80000000
>  0.001299072 .x- d32768v5 irq_direct, vec fa, handler = 0xffff82d08026d7e4
>  0.001300014 .x- d32768v5 raise_softirq nr 0
>  0.001300487 .x- d32768v5 irq_exit irq 80000000, in_irq = 0

The IRQ number looks bogus here, and I'm not convinced giving
a bogus example in a commit message is a good idea. I realize
this is presumably a result of vector_irq[] being initialized to
INT_MIN, but I would say the trace points would then better be
moved so that no bogus data is being recorded.

> @@ -884,9 +919,13 @@ void do_IRQ(struct cpu_user_regs *regs)
>              desc->rl_quantum_start = now;
>          }
>  
> -        tsc_in = tb_init_done ? get_cycles() : 0;
> +        if ( unlikely(tb_init_done) )
> +        {
> +            __trace_var(TRC_HW_IRQ_GUEST, 0, sizeof(irq), &irq);
> +            tsc_in = get_cycles();
> +        }
>          __do_IRQ_guest(irq);
> -        TRACE_3D(TRC_HW_IRQ_HANDLED, irq, tsc_in, get_cycles());
> +        trace_irq_handled(irq, get_cycles() - tsc_in);
>          goto out_no_end;
>      }

Before this change, the get_cycles() invocation was after
the tb_init_done check. Now you move it ahead of it (as
the function arguments are evaluated before executing the
function body) - are you sure all compiler versions will suitably
re-order this?

Additionally I'm afraid this will trigger compiler warnings on at
least some compiler versions, as tsc_in is now possibly
uninitialized (and there's no clear way to disprove this for the
compiler, again because function arguments are being
evaluated before the function body is reached).

As to get_cycles() itself - is the relatively imprecise time
stamp it produces really good enough for tracing? I notice
that there are only very few other users of that function.

> @@ -922,6 +962,7 @@ void do_IRQ(struct cpu_user_regs *regs)
>      spin_unlock(&desc->lock);
>   out_no_unlock:
>      irq_exit();
> +    TRACE_3D(TRC_HW_IRQ_EXIT, irq, desc == NULL ? -1 : desc->status, in_irq());

The ordering of irq_{enter,exit}() and TRACE_{1,3}D() may be
preferable from a trace quality pov, but as far as the system is
concerned you're adding code which runs in interrupt context
without being aware of that. This may be a latent issue.

Jan
Dario Faggioli June 7, 2017, 3:45 p.m. UTC | #4
On Wed, 2017-06-07 at 09:05 -0600, Jan Beulich wrote:
> > > > On 01.06.17 at 19:33, <dario.faggioli@citrix.com> wrote:
> > 
> > More specifically:
> >  - the handling of the TRC_HW_IRQ_HANDLED is fixed, both in
> >    xentrace_format and in xenalyze;
> >  - simple events for recording when we enter and exit the
> >    do_IRQ function, as well as when we deal with a guest
> >    IRQ, are added;
> 
> On x86. What about ARM?
> 
We don't have tracing at all on ARM. I guess I can mention this in the
commit message (and, perhaps, even add an 'x86' tag to the subject).

> >  - tracing of IRQs handled with direct vectors is also
> >    added.
> > 
> > With all the above, a trace will now look like this (using
> > xenalyze):
> > 
> >  0.001299072 .x- d32768v5 irq_enter, irq 80000000
> >  0.001299072 .x- d32768v5 irq_direct, vec fa, handler =
> > 0xffff82d08026d7e4
> >  0.001300014 .x- d32768v5 raise_softirq nr 0
> >  0.001300487 .x- d32768v5 irq_exit irq 80000000, in_irq = 0
> 
> The IRQ number looks bogus here, and I'm not convinced giving
> a bogus example in a commit message is a good idea. I realize
> this is presumably a result of vector_irq[] being initialized to
> INT_MIN, but I would say the trace points would then better be
> moved so that no bogus data is being recorded.
> 
Ok, I'll have a look at how to achieve that.

> > @@ -884,9 +919,13 @@ void do_IRQ(struct cpu_user_regs *regs)
> >              desc->rl_quantum_start = now;
> >          }
> >  
> > -        tsc_in = tb_init_done ? get_cycles() : 0;
> > +        if ( unlikely(tb_init_done) )
> > +        {
> > +            __trace_var(TRC_HW_IRQ_GUEST, 0, sizeof(irq), &irq);
> > +            tsc_in = get_cycles();
> > +        }
> >          __do_IRQ_guest(irq);
> > -        TRACE_3D(TRC_HW_IRQ_HANDLED, irq, tsc_in, get_cycles());
> > +        trace_irq_handled(irq, get_cycles() - tsc_in);
> >          goto out_no_end;
> >      }
> 
> Before this change, the get_cycles() invocation was after
> the tb_init_done check. Now you move it ahead of it (as
> the function arguments are evaluated before executing the
> function body) - are you sure all compiler versions will suitably
> re-order this?
> 
> Additionally I'm afraid this will trigger compiler warnings on at
> least some compiler versions, as tsc_in is now possibly
> uninitialized (and there's no clear way to disprove this for the
> compiler, again because function arguments are being
> evaluated before the function body is reached).
> 
I understand and (now that I see it) agree with your remark on when
get_cycles() is called. I'll reorganize things so that the patched
behavior matches the existing one.

OTOH, I'm not sure I see the "potentially uninitialized" issue for
tsc_in, but since I'm reworking the code, I'll keep that in mind too.

> As to get_cycles() itself - is the relatively imprecise time
> stamp it produces really good enough for tracing? I notice
> that there are only very few other users of that function.
> 
Yes, that's also something I was wondering. It's what is being used
currently, so I thought it was like that for a reason, and that it
wasn't this patch job to change that.

But if it's judged to be fine to turn this into NOW() (done either
here, or in a dedicated patch)., I'm all for it.

> > @@ -922,6 +962,7 @@ void do_IRQ(struct cpu_user_regs *regs)
> >      spin_unlock(&desc->lock);
> >   out_no_unlock:
> >      irq_exit();
> > +    TRACE_3D(TRC_HW_IRQ_EXIT, irq, desc == NULL ? -1 : desc-
> > >status, in_irq());
> 
> The ordering of irq_{enter,exit}() and TRACE_{1,3}D() may be
> preferable from a trace quality pov, but as far as the system is
> concerned you're adding code which runs in interrupt context
> without being aware of that. This may be a latent issue.
> 
Sorry, I don't understand your concern(s). What is it that I am not
aware of, and what is it that could be a latent issue?

About the position of those tracepoints: the nice thing about IRQ_EXIT
following irq_exit() is that I can record the result of in_irq(), which
will tell whether or not we're dealing with a nested interrupt. I don't
 do the same for IRQ_ENTER, but, not that I think about it, I guess I
could.

However, if what you're saying is that they need to stay within the
irq_enter()-irq_exit() section, I can certainly make that happen. The
trace needs to be interpreted knowing where the tracepoints are anyway
(and that's true for each and every event already), so no big deal,
really.

Thanks and Regards,
Dario
Jan Beulich June 7, 2017, 3:58 p.m. UTC | #5
>>> On 07.06.17 at 17:45, <dario.faggioli@citrix.com> wrote:
> On Wed, 2017-06-07 at 09:05 -0600, Jan Beulich wrote:
>> > > > On 01.06.17 at 19:33, <dario.faggioli@citrix.com> wrote:
>> > @@ -884,9 +919,13 @@ void do_IRQ(struct cpu_user_regs *regs)
>> >              desc->rl_quantum_start = now;
>> >          }
>> >  
>> > -        tsc_in = tb_init_done ? get_cycles() : 0;
>> > +        if ( unlikely(tb_init_done) )
>> > +        {
>> > +            __trace_var(TRC_HW_IRQ_GUEST, 0, sizeof(irq), &irq);
>> > +            tsc_in = get_cycles();
>> > +        }
>> >          __do_IRQ_guest(irq);
>> > -        TRACE_3D(TRC_HW_IRQ_HANDLED, irq, tsc_in, get_cycles());
>> > +        trace_irq_handled(irq, get_cycles() - tsc_in);
>> >          goto out_no_end;
>> >      }
>> 
>> Before this change, the get_cycles() invocation was after
>> the tb_init_done check. Now you move it ahead of it (as
>> the function arguments are evaluated before executing the
>> function body) - are you sure all compiler versions will suitably
>> re-order this?
>> 
>> Additionally I'm afraid this will trigger compiler warnings on at
>> least some compiler versions, as tsc_in is now possibly
>> uninitialized (and there's no clear way to disprove this for the
>> compiler, again because function arguments are being
>> evaluated before the function body is reached).
>> 
> I understand and (now that I see it) agree with your remark on when
> get_cycles() is called. I'll reorganize things so that the patched
> behavior matches the existing one.
> 
> OTOH, I'm not sure I see the "potentially uninitialized" issue for
> tsc_in, but since I'm reworking the code, I'll keep that in mind too.

You initialize tsc_in only when tb_init_done is set, but you use
it without that conditional. And even if you used it under that
same conditional, older compiler versions aren't able to track
that fact (same conditional) and raise a warning anyway.

>> As to get_cycles() itself - is the relatively imprecise time
>> stamp it produces really good enough for tracing? I notice
>> that there are only very few other users of that function.
>> 
> Yes, that's also something I was wondering. It's what is being used
> currently, so I thought it was like that for a reason, and that it
> wasn't this patch job to change that.
> 
> But if it's judged to be fine to turn this into NOW() (done either
> here, or in a dedicated patch)., I'm all for it.

Right, I didn't necessarily mean for this to be done here, I merely
wanted to raise the fact (remembering that for the other time
handling code using "synced" TSC reads was quite a bit of an
accuracy improvement).

>> > @@ -922,6 +962,7 @@ void do_IRQ(struct cpu_user_regs *regs)
>> >      spin_unlock(&desc->lock);
>> >   out_no_unlock:
>> >      irq_exit();
>> > +    TRACE_3D(TRC_HW_IRQ_EXIT, irq, desc == NULL ? -1 : desc-
>> > >status, in_irq());
>> 
>> The ordering of irq_{enter,exit}() and TRACE_{1,3}D() may be
>> preferable from a trace quality pov, but as far as the system is
>> concerned you're adding code which runs in interrupt context
>> without being aware of that. This may be a latent issue.
>> 
> Sorry, I don't understand your concern(s). What is it that I am not
> aware of, and what is it that could be a latent issue?
> 
> About the position of those tracepoints: the nice thing about IRQ_EXIT
> following irq_exit() is that I can record the result of in_irq(), which
> will tell whether or not we're dealing with a nested interrupt. I don't
>  do the same for IRQ_ENTER, but, not that I think about it, I guess I
> could.
> 
> However, if what you're saying is that they need to stay within the
> irq_enter()-irq_exit() section, I can certainly make that happen. The
> trace needs to be interpreted knowing where the tracepoints are anyway
> (and that's true for each and every event already), so no big deal,
> really.

Yes, it's really just this latter fact - it would feel more safe from
an overall system pov if you had

    irq_enter();
    TRACE_1D();
    ...
    TRACE_3D();
    irq_exit();

Jan
George Dunlap June 8, 2017, 2:53 p.m. UTC | #6
On 07/06/17 16:58, Jan Beulich wrote:
>>>> On 07.06.17 at 17:45, <dario.faggioli@citrix.com> wrote:
>> On Wed, 2017-06-07 at 09:05 -0600, Jan Beulich wrote:
>>>>>> On 01.06.17 at 19:33, <dario.faggioli@citrix.com> wrote:
>>>> @@ -884,9 +919,13 @@ void do_IRQ(struct cpu_user_regs *regs)
>>>>              desc->rl_quantum_start = now;
>>>>          }
>>>>  
>>>> -        tsc_in = tb_init_done ? get_cycles() : 0;
>>>> +        if ( unlikely(tb_init_done) )
>>>> +        {
>>>> +            __trace_var(TRC_HW_IRQ_GUEST, 0, sizeof(irq), &irq);
>>>> +            tsc_in = get_cycles();
>>>> +        }
>>>>          __do_IRQ_guest(irq);
>>>> -        TRACE_3D(TRC_HW_IRQ_HANDLED, irq, tsc_in, get_cycles());
>>>> +        trace_irq_handled(irq, get_cycles() - tsc_in);
>>>>          goto out_no_end;
>>>>      }
>>>
>>> Before this change, the get_cycles() invocation was after
>>> the tb_init_done check. Now you move it ahead of it (as
>>> the function arguments are evaluated before executing the
>>> function body) - are you sure all compiler versions will suitably
>>> re-order this?
>>>
>>> Additionally I'm afraid this will trigger compiler warnings on at
>>> least some compiler versions, as tsc_in is now possibly
>>> uninitialized (and there's no clear way to disprove this for the
>>> compiler, again because function arguments are being
>>> evaluated before the function body is reached).
>>>
>> I understand and (now that I see it) agree with your remark on when
>> get_cycles() is called. I'll reorganize things so that the patched
>> behavior matches the existing one.
>>
>> OTOH, I'm not sure I see the "potentially uninitialized" issue for
>> tsc_in, but since I'm reworking the code, I'll keep that in mind too.
> 
> You initialize tsc_in only when tb_init_done is set, but you use
> it without that conditional. And even if you used it under that
> same conditional, older compiler versions aren't able to track
> that fact (same conditional) and raise a warning anyway.

This patch changes thing to initialize tsc_in on declaration (at the top
of the function).

If we want to keep the compiler "uninitialized variable" analysis
around, that would have to go away and we'd want to do something like
was there before.  (I'm ambivalent about it, but I know Jan likes it.)

 -George
George Dunlap June 8, 2017, 2:59 p.m. UTC | #7
On 01/06/17 18:33, Dario Faggioli wrote:
> More specifically:
>  - the handling of the TRC_HW_IRQ_HANDLED is fixed, both in
>    xentrace_format and in xenalyze;
>  - simple events for recording when we enter and exit the
>    do_IRQ function, as well as when we deal with a guest
>    IRQ, are added;
>  - tracing of IRQs handled with direct vectors is also
>    added.
> 
> With all the above, a trace will now look like this (using
> xenalyze):
> 
>  0.001299072 .x- d32768v5 irq_enter, irq 80000000
>  0.001299072 .x- d32768v5 irq_direct, vec fa, handler = 0xffff82d08026d7e4
>  0.001300014 .x- d32768v5 raise_softirq nr 0
>  0.001300487 .x- d32768v5 irq_exit irq 80000000, in_irq = 0
> 
> Or like this:
> 
>  0.049437892 -|- d32767v12 irq_enter, irq 4
>  0.049437892 -|- d32767v12 irq_handled irq 4, 85428 cycles
>  0.049474336 -|- d32767v12 irq_exit irq 4, status = 0x0, in_irq = 0
> 
> Making it much easier to figure out when interrupt
> processing start, what it does and when it ends.
> 
> Signed-off-by: Dario Faggioli <dario.faggioli@citrix.com>

Generally looks good to me.  I mostly agree with the commentary as well.

 -George
Jan Beulich June 8, 2017, 3:34 p.m. UTC | #8
>>> On 08.06.17 at 16:53, <george.dunlap@citrix.com> wrote:
> On 07/06/17 16:58, Jan Beulich wrote:
>>>>> On 07.06.17 at 17:45, <dario.faggioli@citrix.com> wrote:
>>> On Wed, 2017-06-07 at 09:05 -0600, Jan Beulich wrote:
>>>>>>> On 01.06.17 at 19:33, <dario.faggioli@citrix.com> wrote:
>>>>> @@ -884,9 +919,13 @@ void do_IRQ(struct cpu_user_regs *regs)
>>>>>              desc->rl_quantum_start = now;
>>>>>          }
>>>>>  
>>>>> -        tsc_in = tb_init_done ? get_cycles() : 0;
>>>>> +        if ( unlikely(tb_init_done) )
>>>>> +        {
>>>>> +            __trace_var(TRC_HW_IRQ_GUEST, 0, sizeof(irq), &irq);
>>>>> +            tsc_in = get_cycles();
>>>>> +        }
>>>>>          __do_IRQ_guest(irq);
>>>>> -        TRACE_3D(TRC_HW_IRQ_HANDLED, irq, tsc_in, get_cycles());
>>>>> +        trace_irq_handled(irq, get_cycles() - tsc_in);
>>>>>          goto out_no_end;
>>>>>      }
>>>>
>>>> Before this change, the get_cycles() invocation was after
>>>> the tb_init_done check. Now you move it ahead of it (as
>>>> the function arguments are evaluated before executing the
>>>> function body) - are you sure all compiler versions will suitably
>>>> re-order this?
>>>>
>>>> Additionally I'm afraid this will trigger compiler warnings on at
>>>> least some compiler versions, as tsc_in is now possibly
>>>> uninitialized (and there's no clear way to disprove this for the
>>>> compiler, again because function arguments are being
>>>> evaluated before the function body is reached).
>>>>
>>> I understand and (now that I see it) agree with your remark on when
>>> get_cycles() is called. I'll reorganize things so that the patched
>>> behavior matches the existing one.
>>>
>>> OTOH, I'm not sure I see the "potentially uninitialized" issue for
>>> tsc_in, but since I'm reworking the code, I'll keep that in mind too.
>> 
>> You initialize tsc_in only when tb_init_done is set, but you use
>> it without that conditional. And even if you used it under that
>> same conditional, older compiler versions aren't able to track
>> that fact (same conditional) and raise a warning anyway.
> 
> This patch changes thing to initialize tsc_in on declaration (at the top
> of the function).

Oh, it's only now that I actually see an initializer is being added.
I'm sorry for the noise then.

> If we want to keep the compiler "uninitialized variable" analysis
> around, that would have to go away and we'd want to do something like
> was there before.  (I'm ambivalent about it, but I know Jan likes it.)

No, I'm fine with the change. I was just being blind.

Jan
diff mbox

Patch

diff --git a/tools/xentrace/formats b/tools/xentrace/formats
index 8b31780..00c29b8 100644
--- a/tools/xentrace/formats
+++ b/tools/xentrace/formats
@@ -197,7 +197,11 @@ 
 0x00802005  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  move_vector [ irq = %(1)d had vector 0x%(2)x on CPU%(3)d ]
 0x00802006  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  assign_vector [ irq = %(1)d = vector 0x%(2)x, CPU mask: 0x%(3)08x ]
 0x00802007  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  bogus_vector [ 0x%(1)x ]
-0x00802008  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  do_irq [ irq = %(1)d, began = %(2)dus, ended = %(3)dus ]
+0x00802008  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  irq_handled [ irq = %(1)d, dur = 0x%(3)08x%(2)08x ]
+0x00802009  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  direct_vector [ vector 0x%(1)x, handler = 0x%(3)08x%(2)08x ]
+0x0080200a  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  do_irq [ irq = %(1)d ]
+0x0080200b  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  do_guest_irq [ irq = %(1)d ]
+0x0080200c  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  irq_exit [ irq = %(1)d, status = 0x%(2)x, in_irq = %(3)d ]
 
 0x00084001  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  hpet create [ tn = %(1)d, irq = %(2)d, delta = 0x%(4)08x%(3)08x, period = 0x%(6)08x%(5)08x ]
 0x00084002  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  pit create [ delta = 0x%(1)016x, period = 0x%(2)016x ]
diff --git a/tools/xentrace/xenalyze.c b/tools/xentrace/xenalyze.c
index fa608ad..063eee7 100644
--- a/tools/xentrace/xenalyze.c
+++ b/tools/xentrace/xenalyze.c
@@ -8380,19 +8380,45 @@  void irq_process(struct pcpu_info *p) {
         }
         break;
     }
-    case TRC_HW_IRQ_HANDLED:
+    case TRC_HW_IRQ_ENTER:
+    case TRC_HW_IRQ_GUEST:
     {
         struct {
-            int irq, start_tsc, end_tsc;
+            int32_t irq;
         } *r = (typeof(r))ri->d;
-        int arctime;
 
-        arctime = r->end_tsc - r->start_tsc;
         if ( opt.dump_all )
         {
-            printf(" %s irq_handled irq %x %d (%d,%d)\n",
-                   ri->dump_header,
-                   r->irq, arctime, r->start_tsc, r->end_tsc);
+            printf(" %s irq_%s, irq %x\n", ri->dump_header,
+                   ri->event == TRC_HW_IRQ_ENTER ? "enter" : "guest", r->irq);
+        }
+        break;
+    }
+    case TRC_HW_IRQ_DIRECT_VECTOR:
+    {
+        struct {
+            uint32_t vec;
+            uint64_t handler;
+        } __attribute__((packed)) *r = (typeof(r))ri->d;
+
+        if ( opt.dump_all )
+        {
+            printf(" %s irq_direct, vec %x, handler = %p\n",
+                   ri->dump_header, r->vec, (void*)r->handler);
+        }
+        break;
+    }
+    case TRC_HW_IRQ_HANDLED:
+    {
+        struct {
+            int32_t irq;
+            uint64_t arctime;
+        } __attribute__((packed)) *r = (typeof(r))ri->d;
+
+        if ( opt.dump_all )
+        {
+            printf(" %s irq_handled irq %x, %"PRIu64" cycles\n",
+                   ri->dump_header, r->irq, r->arctime);
         }
         if ( opt.scatterplot_irq )
         {
@@ -8407,6 +8433,22 @@  void irq_process(struct pcpu_info *p) {
         }
         break;
     }
+    case TRC_HW_IRQ_EXIT:
+    {
+        struct {
+            int32_t irq, status;
+            uint32_t in_irq;
+        } *r = (typeof(r))ri->d;
+
+        if ( opt.dump_all )
+        {
+            printf(" %s irq_exit irq %x", ri->dump_header, r->irq);
+            if ( r->status != -1 )
+                printf(", status = 0x%x", r->status);
+            printf(", in_irq = %d\n", r->in_irq);
+        }
+        break;
+    }
     case TRC_HW_IRQ_ASSIGN_VECTOR:
     {
         struct {
diff --git a/xen/arch/x86/irq.c b/xen/arch/x86/irq.c
index 676ba52..f5d9302 100644
--- a/xen/arch/x86/irq.c
+++ b/xen/arch/x86/irq.c
@@ -100,11 +100,15 @@  static void trace_irq_mask(u32 event, int irq, int vector, cpumask_t *mask)
         unsigned int irq:16, vec:16;
         unsigned int mask[6];
     } d;
+
+    if ( likely(!tb_init_done) )
+        return;
+
     d.irq = irq;
     d.vec = vector;
     memset(d.mask, 0, sizeof(d.mask));
     memcpy(d.mask, mask, min(sizeof(d.mask), sizeof(cpumask_t)));
-    trace_var(event, 1, sizeof(d), &d);
+    __trace_var(event, 1, sizeof(d), &d);
 }
 
 static int __init __bind_irq_vector(int irq, int vector, const cpumask_t *cpu_mask)
@@ -804,23 +808,54 @@  void alloc_direct_apic_vector(
     spin_unlock(&lock);
 }
 
+static inline void trace_irq_handled(int irq, uint64_t time)
+{
+    struct __packed {
+        int32_t irq;
+        uint64_t time;
+    } d;
+
+    if ( likely(!tb_init_done) )
+        return;
+
+    d.irq = irq;
+    d.time = time;
+    __trace_var(TRC_HW_IRQ_HANDLED, 0, sizeof(d), &d);
+}
+
 void do_IRQ(struct cpu_user_regs *regs)
 {
     struct irqaction *action;
-    uint32_t          tsc_in;
-    struct irq_desc  *desc;
+    uint64_t          tsc_in = 0;
+    struct irq_desc  *desc = NULL;
     unsigned int      vector = (u8)regs->entry_vector;
     int irq = __get_cpu_var(vector_irq[vector]);
     struct cpu_user_regs *old_regs = set_irq_regs(regs);
     
     perfc_incr(irqs);
     this_cpu(irq_count)++;
+    TRACE_1D(TRC_HW_IRQ_ENTER, irq);
     irq_enter();
 
-    if (irq < 0) {
-        if (direct_apic_vector[vector] != NULL) {
+    if (irq < 0)
+    {
+        if (direct_apic_vector[vector] != NULL)
+        {
+            if ( unlikely(tb_init_done) )
+            {
+                struct __packed {
+                    uint32_t vec;
+                    uint64_t handler;
+                } d;
+
+                d.vec = vector;
+                d.handler = (uint64_t)direct_apic_vector[vector];
+                __trace_var(TRC_HW_IRQ_DIRECT_VECTOR, 0, sizeof(d), &d);
+            }
             (*direct_apic_vector[vector])(regs);
-        } else {
+        }
+        else
+        {
             const char *kind = ", LAPIC";
 
             if ( apic_isr_read(vector) )
@@ -884,9 +919,13 @@  void do_IRQ(struct cpu_user_regs *regs)
             desc->rl_quantum_start = now;
         }
 
-        tsc_in = tb_init_done ? get_cycles() : 0;
+        if ( unlikely(tb_init_done) )
+        {
+            __trace_var(TRC_HW_IRQ_GUEST, 0, sizeof(irq), &irq);
+            tsc_in = get_cycles();
+        }
         __do_IRQ_guest(irq);
-        TRACE_3D(TRC_HW_IRQ_HANDLED, irq, tsc_in, get_cycles());
+        trace_irq_handled(irq, get_cycles() - tsc_in);
         goto out_no_end;
     }
 
@@ -907,9 +946,10 @@  void do_IRQ(struct cpu_user_regs *regs)
     {
         desc->status &= ~IRQ_PENDING;
         spin_unlock_irq(&desc->lock);
-        tsc_in = tb_init_done ? get_cycles() : 0;
+        if ( unlikely(tb_init_done) )
+            tsc_in = get_cycles();
         action->handler(irq, action->dev_id, regs);
-        TRACE_3D(TRC_HW_IRQ_HANDLED, irq, tsc_in, get_cycles());
+        trace_irq_handled(irq, get_cycles() - tsc_in);
         spin_lock_irq(&desc->lock);
     }
 
@@ -922,6 +962,7 @@  void do_IRQ(struct cpu_user_regs *regs)
     spin_unlock(&desc->lock);
  out_no_unlock:
     irq_exit();
+    TRACE_3D(TRC_HW_IRQ_EXIT, irq, desc == NULL ? -1 : desc->status, in_irq());
     set_irq_regs(old_regs);
 }
 
diff --git a/xen/include/public/trace.h b/xen/include/public/trace.h
index 7f2e891..f66a7af 100644
--- a/xen/include/public/trace.h
+++ b/xen/include/public/trace.h
@@ -271,6 +271,10 @@ 
 #define TRC_HW_IRQ_ASSIGN_VECTOR      (TRC_HW_IRQ + 0x6)
 #define TRC_HW_IRQ_UNMAPPED_VECTOR    (TRC_HW_IRQ + 0x7)
 #define TRC_HW_IRQ_HANDLED            (TRC_HW_IRQ + 0x8)
+#define TRC_HW_IRQ_DIRECT_VECTOR      (TRC_HW_IRQ + 0x9)
+#define TRC_HW_IRQ_ENTER              (TRC_HW_IRQ + 0xA)
+#define TRC_HW_IRQ_GUEST              (TRC_HW_IRQ + 0xB)
+#define TRC_HW_IRQ_EXIT               (TRC_HW_IRQ + 0xC)
 
 /*
  * Event Flags