diff mbox

[v2,06/10] xen: tracing: add trace records for schedule and rate-limiting.

Message ID 147520404038.22544.12557551024149852193.stgit@Solace.fritz.box (mailing list archive)
State New, archived
Headers show

Commit Message

Dario Faggioli Sept. 30, 2016, 2:54 a.m. UTC
As far as {csched, csched2, rt}_schedule() are concerned,
an "empty" event, would already make it easier to read and
understand a trace.

But while there, add a few useful information, like
if the cpu that is going through the scheduler has
been tickled or not, if it is currently idle, etc
(they vary, on a per-scheduler basis).

For Credit1 and Credit2, add a record about when
rate-limiting kicks in too.

Signed-off-by: Dario Faggioli <dario.faggioli@citrix.com>
---
Cc: George Dunlap <george.dunlap@eu.citrix.com>
Cc: Meng Xu <mengxu@cis.upenn.edu>
Cc: Anshul Makkar <anshul.makkar@citrix.com>
---
Changes from v1:
 * corrected the schedule record for sched_rt.c, as pointed out during review;
 * pack the Credit1 records as well, as requested during review.
---
 xen/common/sched_credit.c  |   32 ++++++++++++++++++++++++++++++++
 xen/common/sched_credit2.c |   40 +++++++++++++++++++++++++++++++++++++++-
 xen/common/sched_rt.c      |   15 +++++++++++++++
 3 files changed, 86 insertions(+), 1 deletion(-)

Comments

George Dunlap Sept. 30, 2016, 1:16 p.m. UTC | #1
On 30/09/16 03:54, Dario Faggioli wrote:
> As far as {csched, csched2, rt}_schedule() are concerned,
> an "empty" event, would already make it easier to read and
> understand a trace.
> 
> But while there, add a few useful information, like
> if the cpu that is going through the scheduler has
> been tickled or not, if it is currently idle, etc
> (they vary, on a per-scheduler basis).
> 
> For Credit1 and Credit2, add a record about when
> rate-limiting kicks in too.
> 
> Signed-off-by: Dario Faggioli <dario.faggioli@citrix.com>

Acked-by: George Dunlap <george.dunlap@citrix.com>

> ---
> Cc: George Dunlap <george.dunlap@eu.citrix.com>
> Cc: Meng Xu <mengxu@cis.upenn.edu>
> Cc: Anshul Makkar <anshul.makkar@citrix.com>
> ---
> Changes from v1:
>  * corrected the schedule record for sched_rt.c, as pointed out during review;
>  * pack the Credit1 records as well, as requested during review.
> ---
>  xen/common/sched_credit.c  |   32 ++++++++++++++++++++++++++++++++
>  xen/common/sched_credit2.c |   40 +++++++++++++++++++++++++++++++++++++++-
>  xen/common/sched_rt.c      |   15 +++++++++++++++
>  3 files changed, 86 insertions(+), 1 deletion(-)
> 
> diff --git a/xen/common/sched_credit.c b/xen/common/sched_credit.c
> index 5700763..fc3a321 100644
> --- a/xen/common/sched_credit.c
> +++ b/xen/common/sched_credit.c
> @@ -133,6 +133,8 @@
>  #define TRC_CSCHED_TICKLE        TRC_SCHED_CLASS_EVT(CSCHED, 6)
>  #define TRC_CSCHED_BOOST_START   TRC_SCHED_CLASS_EVT(CSCHED, 7)
>  #define TRC_CSCHED_BOOST_END     TRC_SCHED_CLASS_EVT(CSCHED, 8)
> +#define TRC_CSCHED_SCHEDULE      TRC_SCHED_CLASS_EVT(CSCHED, 9)
> +#define TRC_CSCHED_RATELIMIT     TRC_SCHED_CLASS_EVT(CSCHED, 10)
>  
>  
>  /*
> @@ -1774,6 +1776,23 @@ csched_schedule(
>      SCHED_STAT_CRANK(schedule);
>      CSCHED_VCPU_CHECK(current);
>  
> +    /*
> +     * Here in Credit1 code, we usually just call TRACE_nD() helpers, and
> +     * don't care about packing. But scheduling happens very often, so it
> +     * actually is important that the record is as small as possible.
> +     */
> +    if ( unlikely(tb_init_done) )
> +    {
> +        struct {
> +            unsigned cpu:16, tasklet:8, idle:8;
> +        } d;
> +        d.cpu = cpu;
> +        d.tasklet = tasklet_work_scheduled;
> +        d.idle = is_idle_vcpu(current);
> +        __trace_var(TRC_CSCHED_SCHEDULE, 1, sizeof(d),
> +                    (unsigned char *)&d);
> +    }
> +
>      runtime = now - current->runstate.state_entry_time;
>      if ( runtime < 0 ) /* Does this ever happen? */
>          runtime = 0;
> @@ -1829,6 +1848,19 @@ csched_schedule(
>          tslice = MICROSECS(prv->ratelimit_us) - runtime;
>          if ( unlikely(runtime < CSCHED_MIN_TIMER) )
>              tslice = CSCHED_MIN_TIMER;
> +        if ( unlikely(tb_init_done) )
> +        {
> +            struct {
> +                unsigned vcpu:16, dom:16;
> +                unsigned runtime;
> +            } d;
> +            d.dom = scurr->vcpu->domain->domain_id;
> +            d.vcpu = scurr->vcpu->vcpu_id;
> +            d.runtime = runtime;
> +            __trace_var(TRC_CSCHED_RATELIMIT, 1, sizeof(d),
> +                        (unsigned char *)&d);
> +        }
> +
>          ret.migrated = 0;
>          goto out;
>      }
> diff --git a/xen/common/sched_credit2.c b/xen/common/sched_credit2.c
> index fde61ef..5cf3f16 100644
> --- a/xen/common/sched_credit2.c
> +++ b/xen/common/sched_credit2.c
> @@ -55,6 +55,8 @@
>  #define TRC_CSCHED2_LOAD_BALANCE     TRC_SCHED_CLASS_EVT(CSCHED2, 17)
>  #define TRC_CSCHED2_PICKED_CPU       TRC_SCHED_CLASS_EVT(CSCHED2, 19)
>  #define TRC_CSCHED2_RUNQ_CANDIDATE   TRC_SCHED_CLASS_EVT(CSCHED2, 20)
> +#define TRC_CSCHED2_SCHEDULE         TRC_SCHED_CLASS_EVT(CSCHED2, 21)
> +#define TRC_CSCHED2_RATELIMIT        TRC_SCHED_CLASS_EVT(CSCHED2, 22)
>  
>  /*
>   * WARNING: This is still in an experimental phase.  Status and work can be found at the
> @@ -2288,12 +2290,29 @@ runq_candidate(struct csched2_runqueue_data *rqd,
>       * no point forcing it to do so until rate limiting expires.
>       */
>      if ( __test_and_clear_bit(__CSFLAG_vcpu_yield, &scurr->flags) )
> +    {
>          yield_bias = CSCHED2_YIELD_BIAS;
> +    }
>      else if ( prv->ratelimit_us && !is_idle_vcpu(scurr->vcpu) &&
>                vcpu_runnable(scurr->vcpu) &&
>                (now - scurr->vcpu->runstate.state_entry_time) <
>                 MICROSECS(prv->ratelimit_us) )
> +    {
> +        if ( unlikely(tb_init_done) )
> +        {
> +            struct {
> +                unsigned vcpu:16, dom:16;
> +                unsigned runtime;
> +            } d;
> +            d.dom = scurr->vcpu->domain->domain_id;
> +            d.vcpu = scurr->vcpu->vcpu_id;
> +            d.runtime = now - scurr->vcpu->runstate.state_entry_time;
> +            __trace_var(TRC_CSCHED2_RATELIMIT, 1,
> +                        sizeof(d),
> +                        (unsigned char *)&d);
> +        }
>          return scurr;
> +    }
>  
>      /* Default to current if runnable, idle otherwise */
>      if ( vcpu_runnable(scurr->vcpu) )
> @@ -2383,6 +2402,7 @@ csched2_schedule(
>      struct csched2_vcpu *snext = NULL;
>      unsigned int skipped_vcpus = 0;
>      struct task_slice ret;
> +    bool_t tickled;
>  
>      SCHED_STAT_CRANK(schedule);
>      CSCHED2_VCPU_CHECK(current);
> @@ -2397,13 +2417,31 @@ csched2_schedule(
>      BUG_ON(!is_idle_vcpu(scurr->vcpu) && scurr->rqd != rqd);
>  
>      /* Clear "tickled" bit now that we've been scheduled */
> -    if ( cpumask_test_cpu(cpu, &rqd->tickled) )
> +    tickled = cpumask_test_cpu(cpu, &rqd->tickled);
> +    if ( tickled )
>      {
>          __cpumask_clear_cpu(cpu, &rqd->tickled);
>          cpumask_andnot(cpumask_scratch, &rqd->idle, &rqd->tickled);
>          smt_idle_mask_set(cpu, cpumask_scratch, &rqd->smt_idle);
>      }
>  
> +    if ( unlikely(tb_init_done) )
> +    {
> +        struct {
> +            unsigned cpu:16, rq_id:16;
> +            unsigned tasklet:8, idle:8, smt_idle:8, tickled:8;
> +        } d;
> +        d.cpu = cpu;
> +        d.rq_id = c2r(ops, cpu);
> +        d.tasklet = tasklet_work_scheduled;
> +        d.idle = is_idle_vcpu(current);
> +        d.smt_idle = cpumask_test_cpu(cpu, &rqd->smt_idle);
> +        d.tickled = tickled;
> +        __trace_var(TRC_CSCHED2_SCHEDULE, 1,
> +                    sizeof(d),
> +                    (unsigned char *)&d);
> +    }
> +
>      /* Update credits */
>      burn_credits(rqd, scurr, now);
>  
> diff --git a/xen/common/sched_rt.c b/xen/common/sched_rt.c
> index 41c61a7..d95f798 100644
> --- a/xen/common/sched_rt.c
> +++ b/xen/common/sched_rt.c
> @@ -160,6 +160,7 @@
>  #define TRC_RTDS_BUDGET_BURN      TRC_SCHED_CLASS_EVT(RTDS, 3)
>  #define TRC_RTDS_BUDGET_REPLENISH TRC_SCHED_CLASS_EVT(RTDS, 4)
>  #define TRC_RTDS_SCHED_TASKLET    TRC_SCHED_CLASS_EVT(RTDS, 5)
> +#define TRC_RTDS_SCHEDULE         TRC_SCHED_CLASS_EVT(RTDS, 6)
>  
>  static void repl_timer_handler(void *data);
>  
> @@ -1035,6 +1036,20 @@ rt_schedule(const struct scheduler *ops, s_time_t now, bool_t tasklet_work_sched
>      struct rt_vcpu *snext = NULL;
>      struct task_slice ret = { .migrated = 0 };
>  
> +    /* TRACE */
> +    {
> +        struct __packed {
> +            unsigned cpu:16, tasklet:8, tickled:4, idle:4;
> +        } d;
> +        d.cpu = cpu;
> +        d.tasklet = tasklet_work_scheduled;
> +        d.tickled = cpumask_test_cpu(cpu, &prv->tickled);
> +        d.idle = is_idle_vcpu(current);
> +        trace_var(TRC_RTDS_SCHEDULE, 1,
> +                  sizeof(d),
> +                  (unsigned char *)&d);
> +    }
> +
>      /* clear ticked bit now that we've been scheduled */
>      cpumask_clear_cpu(cpu, &prv->tickled);
>  
>
Meng Xu Oct. 1, 2016, 12:18 a.m. UTC | #2
On Thu, Sep 29, 2016 at 10:54 PM, Dario Faggioli
<dario.faggioli@citrix.com> wrote:
> As far as {csched, csched2, rt}_schedule() are concerned,
> an "empty" event, would already make it easier to read and
> understand a trace.
>
> But while there, add a few useful information, like
> if the cpu that is going through the scheduler has
> been tickled or not, if it is currently idle, etc
> (they vary, on a per-scheduler basis).
>
> For Credit1 and Credit2, add a record about when
> rate-limiting kicks in too.
>
> Signed-off-by: Dario Faggioli <dario.faggioli@citrix.com>
> ---
> Cc: George Dunlap <george.dunlap@eu.citrix.com>
> Cc: Meng Xu <mengxu@cis.upenn.edu>
> Cc: Anshul Makkar <anshul.makkar@citrix.com>
> ---
> Changes from v1:
>  * corrected the schedule record for sched_rt.c, as pointed out during review;
>  * pack the Credit1 records as well, as requested during review.
> ---
>  xen/common/sched_credit.c  |   32 ++++++++++++++++++++++++++++++++
>  xen/common/sched_credit2.c |   40 +++++++++++++++++++++++++++++++++++++++-
>  xen/common/sched_rt.c      |   15 +++++++++++++++
>  3 files changed, 86 insertions(+), 1 deletion(-)
>

As to sched_rt.c,
Reviewed-by: Meng Xu <mengxu@cis.upenn.edu>

Thanks,

Meng

-----------
Meng Xu
PhD Student in Computer and Information Science
University of Pennsylvania
http://www.cis.upenn.edu/~mengxu/
diff mbox

Patch

diff --git a/xen/common/sched_credit.c b/xen/common/sched_credit.c
index 5700763..fc3a321 100644
--- a/xen/common/sched_credit.c
+++ b/xen/common/sched_credit.c
@@ -133,6 +133,8 @@ 
 #define TRC_CSCHED_TICKLE        TRC_SCHED_CLASS_EVT(CSCHED, 6)
 #define TRC_CSCHED_BOOST_START   TRC_SCHED_CLASS_EVT(CSCHED, 7)
 #define TRC_CSCHED_BOOST_END     TRC_SCHED_CLASS_EVT(CSCHED, 8)
+#define TRC_CSCHED_SCHEDULE      TRC_SCHED_CLASS_EVT(CSCHED, 9)
+#define TRC_CSCHED_RATELIMIT     TRC_SCHED_CLASS_EVT(CSCHED, 10)
 
 
 /*
@@ -1774,6 +1776,23 @@  csched_schedule(
     SCHED_STAT_CRANK(schedule);
     CSCHED_VCPU_CHECK(current);
 
+    /*
+     * Here in Credit1 code, we usually just call TRACE_nD() helpers, and
+     * don't care about packing. But scheduling happens very often, so it
+     * actually is important that the record is as small as possible.
+     */
+    if ( unlikely(tb_init_done) )
+    {
+        struct {
+            unsigned cpu:16, tasklet:8, idle:8;
+        } d;
+        d.cpu = cpu;
+        d.tasklet = tasklet_work_scheduled;
+        d.idle = is_idle_vcpu(current);
+        __trace_var(TRC_CSCHED_SCHEDULE, 1, sizeof(d),
+                    (unsigned char *)&d);
+    }
+
     runtime = now - current->runstate.state_entry_time;
     if ( runtime < 0 ) /* Does this ever happen? */
         runtime = 0;
@@ -1829,6 +1848,19 @@  csched_schedule(
         tslice = MICROSECS(prv->ratelimit_us) - runtime;
         if ( unlikely(runtime < CSCHED_MIN_TIMER) )
             tslice = CSCHED_MIN_TIMER;
+        if ( unlikely(tb_init_done) )
+        {
+            struct {
+                unsigned vcpu:16, dom:16;
+                unsigned runtime;
+            } d;
+            d.dom = scurr->vcpu->domain->domain_id;
+            d.vcpu = scurr->vcpu->vcpu_id;
+            d.runtime = runtime;
+            __trace_var(TRC_CSCHED_RATELIMIT, 1, sizeof(d),
+                        (unsigned char *)&d);
+        }
+
         ret.migrated = 0;
         goto out;
     }
diff --git a/xen/common/sched_credit2.c b/xen/common/sched_credit2.c
index fde61ef..5cf3f16 100644
--- a/xen/common/sched_credit2.c
+++ b/xen/common/sched_credit2.c
@@ -55,6 +55,8 @@ 
 #define TRC_CSCHED2_LOAD_BALANCE     TRC_SCHED_CLASS_EVT(CSCHED2, 17)
 #define TRC_CSCHED2_PICKED_CPU       TRC_SCHED_CLASS_EVT(CSCHED2, 19)
 #define TRC_CSCHED2_RUNQ_CANDIDATE   TRC_SCHED_CLASS_EVT(CSCHED2, 20)
+#define TRC_CSCHED2_SCHEDULE         TRC_SCHED_CLASS_EVT(CSCHED2, 21)
+#define TRC_CSCHED2_RATELIMIT        TRC_SCHED_CLASS_EVT(CSCHED2, 22)
 
 /*
  * WARNING: This is still in an experimental phase.  Status and work can be found at the
@@ -2288,12 +2290,29 @@  runq_candidate(struct csched2_runqueue_data *rqd,
      * no point forcing it to do so until rate limiting expires.
      */
     if ( __test_and_clear_bit(__CSFLAG_vcpu_yield, &scurr->flags) )
+    {
         yield_bias = CSCHED2_YIELD_BIAS;
+    }
     else if ( prv->ratelimit_us && !is_idle_vcpu(scurr->vcpu) &&
               vcpu_runnable(scurr->vcpu) &&
               (now - scurr->vcpu->runstate.state_entry_time) <
                MICROSECS(prv->ratelimit_us) )
+    {
+        if ( unlikely(tb_init_done) )
+        {
+            struct {
+                unsigned vcpu:16, dom:16;
+                unsigned runtime;
+            } d;
+            d.dom = scurr->vcpu->domain->domain_id;
+            d.vcpu = scurr->vcpu->vcpu_id;
+            d.runtime = now - scurr->vcpu->runstate.state_entry_time;
+            __trace_var(TRC_CSCHED2_RATELIMIT, 1,
+                        sizeof(d),
+                        (unsigned char *)&d);
+        }
         return scurr;
+    }
 
     /* Default to current if runnable, idle otherwise */
     if ( vcpu_runnable(scurr->vcpu) )
@@ -2383,6 +2402,7 @@  csched2_schedule(
     struct csched2_vcpu *snext = NULL;
     unsigned int skipped_vcpus = 0;
     struct task_slice ret;
+    bool_t tickled;
 
     SCHED_STAT_CRANK(schedule);
     CSCHED2_VCPU_CHECK(current);
@@ -2397,13 +2417,31 @@  csched2_schedule(
     BUG_ON(!is_idle_vcpu(scurr->vcpu) && scurr->rqd != rqd);
 
     /* Clear "tickled" bit now that we've been scheduled */
-    if ( cpumask_test_cpu(cpu, &rqd->tickled) )
+    tickled = cpumask_test_cpu(cpu, &rqd->tickled);
+    if ( tickled )
     {
         __cpumask_clear_cpu(cpu, &rqd->tickled);
         cpumask_andnot(cpumask_scratch, &rqd->idle, &rqd->tickled);
         smt_idle_mask_set(cpu, cpumask_scratch, &rqd->smt_idle);
     }
 
+    if ( unlikely(tb_init_done) )
+    {
+        struct {
+            unsigned cpu:16, rq_id:16;
+            unsigned tasklet:8, idle:8, smt_idle:8, tickled:8;
+        } d;
+        d.cpu = cpu;
+        d.rq_id = c2r(ops, cpu);
+        d.tasklet = tasklet_work_scheduled;
+        d.idle = is_idle_vcpu(current);
+        d.smt_idle = cpumask_test_cpu(cpu, &rqd->smt_idle);
+        d.tickled = tickled;
+        __trace_var(TRC_CSCHED2_SCHEDULE, 1,
+                    sizeof(d),
+                    (unsigned char *)&d);
+    }
+
     /* Update credits */
     burn_credits(rqd, scurr, now);
 
diff --git a/xen/common/sched_rt.c b/xen/common/sched_rt.c
index 41c61a7..d95f798 100644
--- a/xen/common/sched_rt.c
+++ b/xen/common/sched_rt.c
@@ -160,6 +160,7 @@ 
 #define TRC_RTDS_BUDGET_BURN      TRC_SCHED_CLASS_EVT(RTDS, 3)
 #define TRC_RTDS_BUDGET_REPLENISH TRC_SCHED_CLASS_EVT(RTDS, 4)
 #define TRC_RTDS_SCHED_TASKLET    TRC_SCHED_CLASS_EVT(RTDS, 5)
+#define TRC_RTDS_SCHEDULE         TRC_SCHED_CLASS_EVT(RTDS, 6)
 
 static void repl_timer_handler(void *data);
 
@@ -1035,6 +1036,20 @@  rt_schedule(const struct scheduler *ops, s_time_t now, bool_t tasklet_work_sched
     struct rt_vcpu *snext = NULL;
     struct task_slice ret = { .migrated = 0 };
 
+    /* TRACE */
+    {
+        struct __packed {
+            unsigned cpu:16, tasklet:8, tickled:4, idle:4;
+        } d;
+        d.cpu = cpu;
+        d.tasklet = tasklet_work_scheduled;
+        d.tickled = cpumask_test_cpu(cpu, &prv->tickled);
+        d.idle = is_idle_vcpu(current);
+        trace_var(TRC_RTDS_SCHEDULE, 1,
+                  sizeof(d),
+                  (unsigned char *)&d);
+    }
+
     /* clear ticked bit now that we've been scheduled */
     cpumask_clear_cpu(cpu, &prv->tickled);