diff mbox

[v2,03/10] xen: credit2: make tickling more deterministic

Message ID 147520401933.22544.13337692634316589872.stgit@Solace.fritz.box (mailing list archive)
State New, archived
Headers show

Commit Message

Dario Faggioli Sept. 30, 2016, 2:53 a.m. UTC
Right now, the following scenario can occurr:
 - upon vcpu v wakeup, v itself is put in the runqueue,
   and pcpu X is tickled;
 - pcpu Y schedules (for whatever reason), sees v in
   the runqueue and picks it up.

This may seem ok (or even a good thing), but it's not.
In fact, if runq_tickle() decided X is where v should
run, it did it for a reason (load distribution, SMT
support, cache hotness, affinity, etc), and we really
should try as hard as possible to stick to that.

Of course, we can't be too strict, or we risk leaving
vcpus in the runqueue while there is available CPU
capacity. So, we only leave v in runqueue --for X to
pick it up-- if we see that X has been tickled and
has not scheduled yet, i.e., it will have a real chance
of actually select and schedule v.

If that is not the case, we schedule it on Y (or, at
least, we consider that), as running somewhere non-ideal
is better than not running at all.

The commit also adds performance counters for each of
the possible situations.

Signed-off-by: Dario Faggioli <dario.faggioli@citrix.com>
---
Cc: George Dunlap <george.dunlap@citrix.com>
Cc: Anshul Makkar <anshul.makkar@citrix.com>
Cc: Jan Beulich <JBeulich@suse.com>
Cc: Andrew Cooper <andrew.cooper3@citrix.com>
---
Changes from v1:
 * always initialize tickled_cpu to -1, also for idle vcpus (in which cases, it
   just won't ever change to anything else than that), for improved readability
   and understandability;
 * logic for reporting back to csched_schedule() whether any vcpu was skipped,
   within runq_candidate(), and to only reset the credits if that did not
   happen moved out from here, to another patch.
---
 xen/common/sched_credit2.c   |   37 ++++++++++++++++++++++++++++++++++++-
 xen/include/xen/perfc_defn.h |    3 +++
 2 files changed, 39 insertions(+), 1 deletion(-)

Comments

George Dunlap Sept. 30, 2016, 11:25 a.m. UTC | #1
On 30/09/16 03:53, Dario Faggioli wrote:
> Right now, the following scenario can occurr:
>  - upon vcpu v wakeup, v itself is put in the runqueue,
>    and pcpu X is tickled;
>  - pcpu Y schedules (for whatever reason), sees v in
>    the runqueue and picks it up.
> 
> This may seem ok (or even a good thing), but it's not.
> In fact, if runq_tickle() decided X is where v should
> run, it did it for a reason (load distribution, SMT
> support, cache hotness, affinity, etc), and we really
> should try as hard as possible to stick to that.
> 
> Of course, we can't be too strict, or we risk leaving
> vcpus in the runqueue while there is available CPU
> capacity. So, we only leave v in runqueue --for X to
> pick it up-- if we see that X has been tickled and
> has not scheduled yet, i.e., it will have a real chance
> of actually select and schedule v.
> 
> If that is not the case, we schedule it on Y (or, at
> least, we consider that), as running somewhere non-ideal
> is better than not running at all.
> 
> The commit also adds performance counters for each of
> the possible situations.
> 
> Signed-off-by: Dario Faggioli <dario.faggioli@citrix.com>

Reviewed-by: George Dunlap <george.dunlap@citrix.com>

> ---
> Cc: George Dunlap <george.dunlap@citrix.com>
> Cc: Anshul Makkar <anshul.makkar@citrix.com>
> Cc: Jan Beulich <JBeulich@suse.com>
> Cc: Andrew Cooper <andrew.cooper3@citrix.com>
> ---
> Changes from v1:
>  * always initialize tickled_cpu to -1, also for idle vcpus (in which cases, it
>    just won't ever change to anything else than that), for improved readability
>    and understandability;
>  * logic for reporting back to csched_schedule() whether any vcpu was skipped,
>    within runq_candidate(), and to only reset the credits if that did not
>    happen moved out from here, to another patch.
> ---
>  xen/common/sched_credit2.c   |   37 ++++++++++++++++++++++++++++++++++++-
>  xen/include/xen/perfc_defn.h |    3 +++
>  2 files changed, 39 insertions(+), 1 deletion(-)
> 
> diff --git a/xen/common/sched_credit2.c b/xen/common/sched_credit2.c
> index 5c7d0dc..3986441 100644
> --- a/xen/common/sched_credit2.c
> +++ b/xen/common/sched_credit2.c
> @@ -54,6 +54,7 @@
>  #define TRC_CSCHED2_LOAD_CHECK       TRC_SCHED_CLASS_EVT(CSCHED2, 16)
>  #define TRC_CSCHED2_LOAD_BALANCE     TRC_SCHED_CLASS_EVT(CSCHED2, 17)
>  #define TRC_CSCHED2_PICKED_CPU       TRC_SCHED_CLASS_EVT(CSCHED2, 19)
> +#define TRC_CSCHED2_RUNQ_CANDIDATE   TRC_SCHED_CLASS_EVT(CSCHED2, 20)
>  
>  /*
>   * WARNING: This is still in an experimental phase.  Status and work can be found at the
> @@ -398,6 +399,7 @@ struct csched2_vcpu {
>      int credit;
>      s_time_t start_time; /* When we were scheduled (used for credit) */
>      unsigned flags;      /* 16 bits doesn't seem to play well with clear_bit() */
> +    int tickled_cpu;     /* cpu tickled for picking us up (-1 if none) */
>  
>      /* Individual contribution to load */
>      s_time_t load_last_update;  /* Last time average was updated */
> @@ -1049,6 +1051,10 @@ runq_tickle(const struct scheduler *ops, struct csched2_vcpu *new, s_time_t now)
>      __cpumask_set_cpu(ipid, &rqd->tickled);
>      smt_idle_mask_clear(ipid, &rqd->smt_idle);
>      cpu_raise_softirq(ipid, SCHEDULE_SOFTIRQ);
> +
> +    if ( unlikely(new->tickled_cpu != -1) )
> +        SCHED_STAT_CRANK(tickled_cpu_overwritten);
> +    new->tickled_cpu = ipid;
>  }
>  
>  /*
> @@ -1276,6 +1282,7 @@ csched2_alloc_vdata(const struct scheduler *ops, struct vcpu *vc, void *dd)
>          svc->credit = CSCHED2_IDLE_CREDIT;
>          svc->weight = 0;
>      }
> +    svc->tickled_cpu = -1;
>  
>      SCHED_STAT_CRANK(vcpu_alloc);
>  
> @@ -2268,6 +2275,17 @@ runq_candidate(struct csched2_runqueue_data *rqd,
>          if ( !cpumask_test_cpu(cpu, svc->vcpu->cpu_hard_affinity) )
>              continue;
>  
> +        /*
> +         * If a vcpu is meant to be picked up by another processor, and such
> +         * processor has not scheduled yet, leave it in the runqueue for him.
> +         */
> +        if ( svc->tickled_cpu != -1 && svc->tickled_cpu != cpu &&
> +             cpumask_test_cpu(svc->tickled_cpu, &rqd->tickled) )
> +        {
> +            SCHED_STAT_CRANK(deferred_to_tickled_cpu);
> +            continue;
> +        }
> +
>          /* If this is on a different processor, don't pull it unless
>           * its credit is at least CSCHED2_MIGRATE_RESIST higher. */
>          if ( svc->vcpu->processor != cpu
> @@ -2284,9 +2302,25 @@ runq_candidate(struct csched2_runqueue_data *rqd,
>  
>          /* In any case, if we got this far, break. */
>          break;
> +    }
>  
> +    if ( unlikely(tb_init_done) )
> +    {
> +        struct {
> +            unsigned vcpu:16, dom:16;
> +            unsigned tickled_cpu;
> +        } d;
> +        d.dom = snext->vcpu->domain->domain_id;
> +        d.vcpu = snext->vcpu->vcpu_id;
> +        d.tickled_cpu = snext->tickled_cpu;
> +        __trace_var(TRC_CSCHED2_RUNQ_CANDIDATE, 1,
> +                    sizeof(d),
> +                    (unsigned char *)&d);
>      }
>  
> +    if ( unlikely(snext->tickled_cpu != -1 && snext->tickled_cpu != cpu) )
> +        SCHED_STAT_CRANK(tickled_cpu_overridden);
> +
>      return snext;
>  }
>  
> @@ -2351,7 +2385,7 @@ csched2_schedule(
>          snext = CSCHED2_VCPU(idle_vcpu[cpu]);
>      }
>      else
> -        snext=runq_candidate(rqd, scurr, cpu, now);
> +        snext = runq_candidate(rqd, scurr, cpu, now);
>  
>      /* If switching from a non-idle runnable vcpu, put it
>       * back on the runqueue. */
> @@ -2390,6 +2424,7 @@ csched2_schedule(
>          }
>  
>          snext->start_time = now;
> +        snext->tickled_cpu = -1;
>  
>          /* Safe because lock for old processor is held */
>          if ( snext->vcpu->processor != cpu )
> diff --git a/xen/include/xen/perfc_defn.h b/xen/include/xen/perfc_defn.h
> index a336c71..4a835b8 100644
> --- a/xen/include/xen/perfc_defn.h
> +++ b/xen/include/xen/perfc_defn.h
> @@ -66,6 +66,9 @@ PERFCOUNTER(runtime_max_timer,      "csched2: runtime_max_timer")
>  PERFCOUNTER(migrated,               "csched2: migrated")
>  PERFCOUNTER(migrate_resisted,       "csched2: migrate_resisted")
>  PERFCOUNTER(credit_reset,           "csched2: credit_reset")
> +PERFCOUNTER(deferred_to_tickled_cpu,"csched2: deferred_to_tickled_cpu")
> +PERFCOUNTER(tickled_cpu_overwritten,"csched2: tickled_cpu_overwritten")
> +PERFCOUNTER(tickled_cpu_overridden, "csched2: tickled_cpu_overridden")
>  
>  PERFCOUNTER(need_flush_tlb_flush,   "PG_need_flush tlb flushes")
>  
>
diff mbox

Patch

diff --git a/xen/common/sched_credit2.c b/xen/common/sched_credit2.c
index 5c7d0dc..3986441 100644
--- a/xen/common/sched_credit2.c
+++ b/xen/common/sched_credit2.c
@@ -54,6 +54,7 @@ 
 #define TRC_CSCHED2_LOAD_CHECK       TRC_SCHED_CLASS_EVT(CSCHED2, 16)
 #define TRC_CSCHED2_LOAD_BALANCE     TRC_SCHED_CLASS_EVT(CSCHED2, 17)
 #define TRC_CSCHED2_PICKED_CPU       TRC_SCHED_CLASS_EVT(CSCHED2, 19)
+#define TRC_CSCHED2_RUNQ_CANDIDATE   TRC_SCHED_CLASS_EVT(CSCHED2, 20)
 
 /*
  * WARNING: This is still in an experimental phase.  Status and work can be found at the
@@ -398,6 +399,7 @@  struct csched2_vcpu {
     int credit;
     s_time_t start_time; /* When we were scheduled (used for credit) */
     unsigned flags;      /* 16 bits doesn't seem to play well with clear_bit() */
+    int tickled_cpu;     /* cpu tickled for picking us up (-1 if none) */
 
     /* Individual contribution to load */
     s_time_t load_last_update;  /* Last time average was updated */
@@ -1049,6 +1051,10 @@  runq_tickle(const struct scheduler *ops, struct csched2_vcpu *new, s_time_t now)
     __cpumask_set_cpu(ipid, &rqd->tickled);
     smt_idle_mask_clear(ipid, &rqd->smt_idle);
     cpu_raise_softirq(ipid, SCHEDULE_SOFTIRQ);
+
+    if ( unlikely(new->tickled_cpu != -1) )
+        SCHED_STAT_CRANK(tickled_cpu_overwritten);
+    new->tickled_cpu = ipid;
 }
 
 /*
@@ -1276,6 +1282,7 @@  csched2_alloc_vdata(const struct scheduler *ops, struct vcpu *vc, void *dd)
         svc->credit = CSCHED2_IDLE_CREDIT;
         svc->weight = 0;
     }
+    svc->tickled_cpu = -1;
 
     SCHED_STAT_CRANK(vcpu_alloc);
 
@@ -2268,6 +2275,17 @@  runq_candidate(struct csched2_runqueue_data *rqd,
         if ( !cpumask_test_cpu(cpu, svc->vcpu->cpu_hard_affinity) )
             continue;
 
+        /*
+         * If a vcpu is meant to be picked up by another processor, and such
+         * processor has not scheduled yet, leave it in the runqueue for him.
+         */
+        if ( svc->tickled_cpu != -1 && svc->tickled_cpu != cpu &&
+             cpumask_test_cpu(svc->tickled_cpu, &rqd->tickled) )
+        {
+            SCHED_STAT_CRANK(deferred_to_tickled_cpu);
+            continue;
+        }
+
         /* If this is on a different processor, don't pull it unless
          * its credit is at least CSCHED2_MIGRATE_RESIST higher. */
         if ( svc->vcpu->processor != cpu
@@ -2284,9 +2302,25 @@  runq_candidate(struct csched2_runqueue_data *rqd,
 
         /* In any case, if we got this far, break. */
         break;
+    }
 
+    if ( unlikely(tb_init_done) )
+    {
+        struct {
+            unsigned vcpu:16, dom:16;
+            unsigned tickled_cpu;
+        } d;
+        d.dom = snext->vcpu->domain->domain_id;
+        d.vcpu = snext->vcpu->vcpu_id;
+        d.tickled_cpu = snext->tickled_cpu;
+        __trace_var(TRC_CSCHED2_RUNQ_CANDIDATE, 1,
+                    sizeof(d),
+                    (unsigned char *)&d);
     }
 
+    if ( unlikely(snext->tickled_cpu != -1 && snext->tickled_cpu != cpu) )
+        SCHED_STAT_CRANK(tickled_cpu_overridden);
+
     return snext;
 }
 
@@ -2351,7 +2385,7 @@  csched2_schedule(
         snext = CSCHED2_VCPU(idle_vcpu[cpu]);
     }
     else
-        snext=runq_candidate(rqd, scurr, cpu, now);
+        snext = runq_candidate(rqd, scurr, cpu, now);
 
     /* If switching from a non-idle runnable vcpu, put it
      * back on the runqueue. */
@@ -2390,6 +2424,7 @@  csched2_schedule(
         }
 
         snext->start_time = now;
+        snext->tickled_cpu = -1;
 
         /* Safe because lock for old processor is held */
         if ( snext->vcpu->processor != cpu )
diff --git a/xen/include/xen/perfc_defn.h b/xen/include/xen/perfc_defn.h
index a336c71..4a835b8 100644
--- a/xen/include/xen/perfc_defn.h
+++ b/xen/include/xen/perfc_defn.h
@@ -66,6 +66,9 @@  PERFCOUNTER(runtime_max_timer,      "csched2: runtime_max_timer")
 PERFCOUNTER(migrated,               "csched2: migrated")
 PERFCOUNTER(migrate_resisted,       "csched2: migrate_resisted")
 PERFCOUNTER(credit_reset,           "csched2: credit_reset")
+PERFCOUNTER(deferred_to_tickled_cpu,"csched2: deferred_to_tickled_cpu")
+PERFCOUNTER(tickled_cpu_overwritten,"csched2: tickled_cpu_overwritten")
+PERFCOUNTER(tickled_cpu_overridden, "csched2: tickled_cpu_overridden")
 
 PERFCOUNTER(need_flush_tlb_flush,   "PG_need_flush tlb flushes")