@@ -295,63 +295,6 @@ static int __read_mostly opt_overload_balance_tolerance = -3;
integer_param("credit2_balance_over", opt_overload_balance_tolerance);
/*
- * Runqueue organization.
- *
- * The various cpus are to be assigned each one to a runqueue, and we
- * want that to happen basing on topology. At the moment, it is possible
- * to choose to arrange runqueues to be:
- *
- * - per-core: meaning that there will be one runqueue per each physical
- * core of the host. This will happen if the opt_runqueue
- * parameter is set to 'core';
- *
- * - per-socket: meaning that there will be one runqueue per each physical
- * socket (AKA package, which often, but not always, also
- * matches a NUMA node) of the host; This will happen if
- * the opt_runqueue parameter is set to 'socket';
- *
- * - per-node: meaning that there will be one runqueue per each physical
- * NUMA node of the host. This will happen if the opt_runqueue
- * parameter is set to 'node';
- *
- * - global: meaning that there will be only one runqueue to which all the
- * (logical) processors of the host belong. This will happen if
- * the opt_runqueue parameter is set to 'all'.
- *
- * Depending on the value of opt_runqueue, therefore, cpus that are part of
- * either the same physical core, the same physical socket, the same NUMA
- * node, or just all of them, will be put together to form runqueues.
- */
-#define OPT_RUNQUEUE_CORE 0
-#define OPT_RUNQUEUE_SOCKET 1
-#define OPT_RUNQUEUE_NODE 2
-#define OPT_RUNQUEUE_ALL 3
-static const char *const opt_runqueue_str[] = {
- [OPT_RUNQUEUE_CORE] = "core",
- [OPT_RUNQUEUE_SOCKET] = "socket",
- [OPT_RUNQUEUE_NODE] = "node",
- [OPT_RUNQUEUE_ALL] = "all"
-};
-static int __read_mostly opt_runqueue = OPT_RUNQUEUE_SOCKET;
-
-static void parse_credit2_runqueue(const char *s)
-{
- unsigned int i;
-
- for ( i = 0; i < ARRAY_SIZE(opt_runqueue_str); i++ )
- {
- if ( !strcmp(s, opt_runqueue_str[i]) )
- {
- opt_runqueue = i;
- return;
- }
- }
-
- printk("WARNING, unrecognized value of credit2_runqueue option!\n");
-}
-custom_param("credit2_runqueue", parse_credit2_runqueue);
-
-/*
* Per-runqueue data
*/
struct csched2_runqueue_data {
@@ -563,45 +506,304 @@ static int get_fallback_cpu(struct csched2_vcpu *svc)
return cpumask_first(cpumask_scratch_cpu(cpu));
}
- ASSERT(!cpumask_empty(cpumask_scratch_cpu(cpu)));
-
- return cpumask_first(cpumask_scratch_cpu(cpu));
+ ASSERT(!cpumask_empty(cpumask_scratch_cpu(cpu)));
+
+ return cpumask_first(cpumask_scratch_cpu(cpu));
+}
+
+/*
+ * Time-to-credit, credit-to-time.
+ *
+ * We keep track of the "residual" time to make sure that frequent short
+ * schedules still get accounted for in the end.
+ *
+ * FIXME: Do pre-calculated division?
+ */
+static void t2c_update(struct csched2_runqueue_data *rqd, s_time_t time,
+ struct csched2_vcpu *svc)
+{
+ uint64_t val = time * rqd->max_weight + svc->residual;
+
+ svc->residual = do_div(val, svc->weight);
+ svc->credit -= val;
+}
+
+static s_time_t c2t(struct csched2_runqueue_data *rqd, s_time_t credit, struct csched2_vcpu *svc)
+{
+ return credit * svc->weight / rqd->max_weight;
+}
+
+/*
+ * Runqueue related code.
+ */
+
+/*
+ * Runqueue organization.
+ *
+ * The various cpus are to be assigned each one to a runqueue, and we
+ * want that to happen basing on topology. At the moment, it is possible
+ * to choose to arrange runqueues to be:
+ *
+ * - per-core: meaning that there will be one runqueue per each physical
+ * core of the host. This will happen if the opt_runqueue
+ * parameter is set to 'core';
+ *
+ * - per-socket: meaning that there will be one runqueue per each physical
+ * socket (AKA package, which often, but not always, also
+ * matches a NUMA node) of the host; This will happen if
+ * the opt_runqueue parameter is set to 'socket';
+ *
+ * - per-node: meaning that there will be one runqueue per each physical
+ * NUMA node of the host. This will happen if the opt_runqueue
+ * parameter is set to 'node';
+ *
+ * - global: meaning that there will be only one runqueue to which all the
+ * (logical) processors of the host belong. This will happen if
+ * the opt_runqueue parameter is set to 'all'.
+ *
+ * Depending on the value of opt_runqueue, therefore, cpus that are part of
+ * either the same physical core, the same physical socket, the same NUMA
+ * node, or just all of them, will be put together to form runqueues.
+ */
+#define OPT_RUNQUEUE_CORE 0
+#define OPT_RUNQUEUE_SOCKET 1
+#define OPT_RUNQUEUE_NODE 2
+#define OPT_RUNQUEUE_ALL 3
+static const char *const opt_runqueue_str[] = {
+ [OPT_RUNQUEUE_CORE] = "core",
+ [OPT_RUNQUEUE_SOCKET] = "socket",
+ [OPT_RUNQUEUE_NODE] = "node",
+ [OPT_RUNQUEUE_ALL] = "all"
+};
+static int __read_mostly opt_runqueue = OPT_RUNQUEUE_SOCKET;
+
+static void parse_credit2_runqueue(const char *s)
+{
+ unsigned int i;
+
+ for ( i = 0; i < ARRAY_SIZE(opt_runqueue_str); i++ )
+ {
+ if ( !strcmp(s, opt_runqueue_str[i]) )
+ {
+ opt_runqueue = i;
+ return;
+ }
+ }
+
+ printk("WARNING, unrecognized value of credit2_runqueue option!\n");
+}
+custom_param("credit2_runqueue", parse_credit2_runqueue);
+
+static inline int vcpu_on_runq(struct csched2_vcpu *svc)
+{
+ return !list_empty(&svc->runq_elem);
+}
+
+static struct csched2_vcpu * runq_elem(struct list_head *elem)
+{
+ return list_entry(elem, struct csched2_vcpu, runq_elem);
+}
+
+static void activate_runqueue(struct csched2_private *prv, int rqi)
+{
+ struct csched2_runqueue_data *rqd;
+
+ rqd = prv->rqd + rqi;
+
+ BUG_ON(!cpumask_empty(&rqd->active));
+
+ rqd->max_weight = 1;
+ rqd->id = rqi;
+ INIT_LIST_HEAD(&rqd->svc);
+ INIT_LIST_HEAD(&rqd->runq);
+ spin_lock_init(&rqd->lock);
+
+ __cpumask_set_cpu(rqi, &prv->active_queues);
+}
+
+static void deactivate_runqueue(struct csched2_private *prv, int rqi)
+{
+ struct csched2_runqueue_data *rqd;
+
+ rqd = prv->rqd + rqi;
+
+ BUG_ON(!cpumask_empty(&rqd->active));
+
+ rqd->id = -1;
+
+ __cpumask_clear_cpu(rqi, &prv->active_queues);
+}
+
+static inline bool_t same_node(unsigned int cpua, unsigned int cpub)
+{
+ return cpu_to_node(cpua) == cpu_to_node(cpub);
+}
+
+static inline bool_t same_socket(unsigned int cpua, unsigned int cpub)
+{
+ return cpu_to_socket(cpua) == cpu_to_socket(cpub);
+}
+
+static inline bool_t same_core(unsigned int cpua, unsigned int cpub)
+{
+ return same_socket(cpua, cpub) &&
+ cpu_to_core(cpua) == cpu_to_core(cpub);
+}
+
+static unsigned int
+cpu_to_runqueue(struct csched2_private *prv, unsigned int cpu)
+{
+ struct csched2_runqueue_data *rqd;
+ unsigned int rqi;
+
+ for ( rqi = 0; rqi < nr_cpu_ids; rqi++ )
+ {
+ unsigned int peer_cpu;
+
+ /*
+ * As soon as we come across an uninitialized runqueue, use it.
+ * In fact, either:
+ * - we are initializing the first cpu, and we assign it to
+ * runqueue 0. This is handy, especially if we are dealing
+ * with the boot cpu (if credit2 is the default scheduler),
+ * as we would not be able to use cpu_to_socket() and similar
+ * helpers anyway (they're result of which is not reliable yet);
+ * - we have gone through all the active runqueues, and have not
+ * found anyone whose cpus' topology matches the one we are
+ * dealing with, so activating a new runqueue is what we want.
+ */
+ if ( prv->rqd[rqi].id == -1 )
+ break;
+
+ rqd = prv->rqd + rqi;
+ BUG_ON(cpumask_empty(&rqd->active));
+
+ peer_cpu = cpumask_first(&rqd->active);
+ BUG_ON(cpu_to_socket(cpu) == XEN_INVALID_SOCKET_ID ||
+ cpu_to_socket(peer_cpu) == XEN_INVALID_SOCKET_ID);
+
+ if ( opt_runqueue == OPT_RUNQUEUE_ALL ||
+ (opt_runqueue == OPT_RUNQUEUE_CORE && same_core(peer_cpu, cpu)) ||
+ (opt_runqueue == OPT_RUNQUEUE_SOCKET && same_socket(peer_cpu, cpu)) ||
+ (opt_runqueue == OPT_RUNQUEUE_NODE && same_node(peer_cpu, cpu)) )
+ break;
+ }
+
+ /* We really expect to be able to assign each cpu to a runqueue. */
+ BUG_ON(rqi >= nr_cpu_ids);
+
+ return rqi;
+}
+
+/* Find the domain with the highest weight. */
+static void update_max_weight(struct csched2_runqueue_data *rqd, int new_weight,
+ int old_weight)
+{
+ /* Try to avoid brute-force search:
+ * - If new_weight is larger, max_weigth <- new_weight
+ * - If old_weight != max_weight, someone else is still max_weight
+ * (No action required)
+ * - If old_weight == max_weight, brute-force search for max weight
+ */
+ if ( new_weight > rqd->max_weight )
+ {
+ rqd->max_weight = new_weight;
+ SCHED_STAT_CRANK(upd_max_weight_quick);
+ }
+ else if ( old_weight == rqd->max_weight )
+ {
+ struct list_head *iter;
+ int max_weight = 1;
+
+ list_for_each( iter, &rqd->svc )
+ {
+ struct csched2_vcpu * svc = list_entry(iter, struct csched2_vcpu, rqd_elem);
+
+ if ( svc->weight > max_weight )
+ max_weight = svc->weight;
+ }
+
+ rqd->max_weight = max_weight;
+ SCHED_STAT_CRANK(upd_max_weight_full);
+ }
+
+ if ( unlikely(tb_init_done) )
+ {
+ struct {
+ unsigned rqi:16, max_weight:16;
+ } d;
+ d.rqi = rqd->id;
+ d.max_weight = rqd->max_weight;
+ __trace_var(TRC_CSCHED2_RUNQ_MAX_WEIGHT, 1,
+ sizeof(d),
+ (unsigned char *)&d);
+ }
+}
+
+/* Add and remove from runqueue assignment (not active run queue) */
+static void
+_runq_assign(struct csched2_vcpu *svc, struct csched2_runqueue_data *rqd)
+{
+
+ svc->rqd = rqd;
+ list_add_tail(&svc->rqd_elem, &svc->rqd->svc);
+
+ update_max_weight(svc->rqd, svc->weight, 0);
+
+ /* Expected new load based on adding this vcpu */
+ rqd->b_avgload += svc->avgload;
+
+ if ( unlikely(tb_init_done) )
+ {
+ struct {
+ unsigned vcpu:16, dom:16;
+ unsigned rqi:16;
+ } d;
+ d.dom = svc->vcpu->domain->domain_id;
+ d.vcpu = svc->vcpu->vcpu_id;
+ d.rqi=rqd->id;
+ __trace_var(TRC_CSCHED2_RUNQ_ASSIGN, 1,
+ sizeof(d),
+ (unsigned char *)&d);
+ }
+
}
-/*
- * Time-to-credit, credit-to-time.
- *
- * We keep track of the "residual" time to make sure that frequent short
- * schedules still get accounted for in the end.
- *
- * FIXME: Do pre-calculated division?
- */
-static void t2c_update(struct csched2_runqueue_data *rqd, s_time_t time,
- struct csched2_vcpu *svc)
+static void
+runq_assign(const struct scheduler *ops, struct vcpu *vc)
{
- uint64_t val = time * rqd->max_weight + svc->residual;
+ struct csched2_vcpu *svc = vc->sched_priv;
- svc->residual = do_div(val, svc->weight);
- svc->credit -= val;
+ ASSERT(svc->rqd == NULL);
+
+ _runq_assign(svc, c2rqd(ops, vc->processor));
}
-static s_time_t c2t(struct csched2_runqueue_data *rqd, s_time_t credit, struct csched2_vcpu *svc)
+static void
+_runq_deassign(struct csched2_vcpu *svc)
{
- return credit * svc->weight / rqd->max_weight;
-}
+ struct csched2_runqueue_data *rqd = svc->rqd;
-/*
- * Runqueue related code
- */
+ ASSERT(!vcpu_on_runq(svc));
+ ASSERT(!(svc->flags & CSFLAG_scheduled));
-static inline int vcpu_on_runq(struct csched2_vcpu *svc)
-{
- return !list_empty(&svc->runq_elem);
+ list_del_init(&svc->rqd_elem);
+ update_max_weight(rqd, 0, svc->weight);
+
+ /* Expected new load based on removing this vcpu */
+ rqd->b_avgload = max_t(s_time_t, rqd->b_avgload - svc->avgload, 0);
+
+ svc->rqd = NULL;
}
-static struct csched2_vcpu * runq_elem(struct list_head *elem)
+static void
+runq_deassign(const struct scheduler *ops, struct vcpu *vc)
{
- return list_entry(elem, struct csched2_vcpu, runq_elem);
+ struct csched2_vcpu *svc = vc->sched_priv;
+
+ ASSERT(svc->rqd == c2rqd(ops, vc->processor));
+
+ _runq_deassign(svc);
}
/*
@@ -1219,51 +1421,6 @@ void burn_credits(struct csched2_runqueue_data *rqd,
}
}
-/* Find the domain with the highest weight. */
-static void update_max_weight(struct csched2_runqueue_data *rqd, int new_weight,
- int old_weight)
-{
- /* Try to avoid brute-force search:
- * - If new_weight is larger, max_weigth <- new_weight
- * - If old_weight != max_weight, someone else is still max_weight
- * (No action required)
- * - If old_weight == max_weight, brute-force search for max weight
- */
- if ( new_weight > rqd->max_weight )
- {
- rqd->max_weight = new_weight;
- SCHED_STAT_CRANK(upd_max_weight_quick);
- }
- else if ( old_weight == rqd->max_weight )
- {
- struct list_head *iter;
- int max_weight = 1;
-
- list_for_each( iter, &rqd->svc )
- {
- struct csched2_vcpu * svc = list_entry(iter, struct csched2_vcpu, rqd_elem);
-
- if ( svc->weight > max_weight )
- max_weight = svc->weight;
- }
-
- rqd->max_weight = max_weight;
- SCHED_STAT_CRANK(upd_max_weight_full);
- }
-
- if ( unlikely(tb_init_done) )
- {
- struct {
- unsigned rqi:16, max_weight:16;
- } d;
- d.rqi = rqd->id;
- d.max_weight = rqd->max_weight;
- __trace_var(TRC_CSCHED2_RUNQ_MAX_WEIGHT, 1,
- sizeof(d),
- (unsigned char *)&d);
- }
-}
-
#ifndef NDEBUG
static inline void
csched2_vcpu_check(struct vcpu *vc)
@@ -1328,72 +1485,6 @@ csched2_alloc_vdata(const struct scheduler *ops, struct vcpu *vc, void *dd)
return svc;
}
-/* Add and remove from runqueue assignment (not active run queue) */
-static void
-_runq_assign(struct csched2_vcpu *svc, struct csched2_runqueue_data *rqd)
-{
-
- svc->rqd = rqd;
- list_add_tail(&svc->rqd_elem, &svc->rqd->svc);
-
- update_max_weight(svc->rqd, svc->weight, 0);
-
- /* Expected new load based on adding this vcpu */
- rqd->b_avgload += svc->avgload;
-
- if ( unlikely(tb_init_done) )
- {
- struct {
- unsigned vcpu:16, dom:16;
- unsigned rqi:16;
- } d;
- d.dom = svc->vcpu->domain->domain_id;
- d.vcpu = svc->vcpu->vcpu_id;
- d.rqi=rqd->id;
- __trace_var(TRC_CSCHED2_RUNQ_ASSIGN, 1,
- sizeof(d),
- (unsigned char *)&d);
- }
-
-}
-
-static void
-runq_assign(const struct scheduler *ops, struct vcpu *vc)
-{
- struct csched2_vcpu *svc = vc->sched_priv;
-
- ASSERT(svc->rqd == NULL);
-
- _runq_assign(svc, c2rqd(ops, vc->processor));
-}
-
-static void
-_runq_deassign(struct csched2_vcpu *svc)
-{
- struct csched2_runqueue_data *rqd = svc->rqd;
-
- ASSERT(!vcpu_on_runq(svc));
- ASSERT(!(svc->flags & CSFLAG_scheduled));
-
- list_del_init(&svc->rqd_elem);
- update_max_weight(rqd, 0, svc->weight);
-
- /* Expected new load based on removing this vcpu */
- rqd->b_avgload = max_t(s_time_t, rqd->b_avgload - svc->avgload, 0);
-
- svc->rqd = NULL;
-}
-
-static void
-runq_deassign(const struct scheduler *ops, struct vcpu *vc)
-{
- struct csched2_vcpu *svc = vc->sched_priv;
-
- ASSERT(svc->rqd == c2rqd(ops, vc->processor));
-
- _runq_deassign(svc);
-}
-
static void
csched2_vcpu_sleep(const struct scheduler *ops, struct vcpu *vc)
{
@@ -2778,97 +2869,6 @@ csched2_dump(const struct scheduler *ops)
#undef cpustr
}
-static void activate_runqueue(struct csched2_private *prv, int rqi)
-{
- struct csched2_runqueue_data *rqd;
-
- rqd = prv->rqd + rqi;
-
- BUG_ON(!cpumask_empty(&rqd->active));
-
- rqd->max_weight = 1;
- rqd->id = rqi;
- INIT_LIST_HEAD(&rqd->svc);
- INIT_LIST_HEAD(&rqd->runq);
- spin_lock_init(&rqd->lock);
-
- __cpumask_set_cpu(rqi, &prv->active_queues);
-}
-
-static void deactivate_runqueue(struct csched2_private *prv, int rqi)
-{
- struct csched2_runqueue_data *rqd;
-
- rqd = prv->rqd + rqi;
-
- BUG_ON(!cpumask_empty(&rqd->active));
-
- rqd->id = -1;
-
- __cpumask_clear_cpu(rqi, &prv->active_queues);
-}
-
-static inline bool_t same_node(unsigned int cpua, unsigned int cpub)
-{
- return cpu_to_node(cpua) == cpu_to_node(cpub);
-}
-
-static inline bool_t same_socket(unsigned int cpua, unsigned int cpub)
-{
- return cpu_to_socket(cpua) == cpu_to_socket(cpub);
-}
-
-static inline bool_t same_core(unsigned int cpua, unsigned int cpub)
-{
- return same_socket(cpua, cpub) &&
- cpu_to_core(cpua) == cpu_to_core(cpub);
-}
-
-static unsigned int
-cpu_to_runqueue(struct csched2_private *prv, unsigned int cpu)
-{
- struct csched2_runqueue_data *rqd;
- unsigned int rqi;
-
- for ( rqi = 0; rqi < nr_cpu_ids; rqi++ )
- {
- unsigned int peer_cpu;
-
- /*
- * As soon as we come across an uninitialized runqueue, use it.
- * In fact, either:
- * - we are initializing the first cpu, and we assign it to
- * runqueue 0. This is handy, especially if we are dealing
- * with the boot cpu (if credit2 is the default scheduler),
- * as we would not be able to use cpu_to_socket() and similar
- * helpers anyway (they're result of which is not reliable yet);
- * - we have gone through all the active runqueues, and have not
- * found anyone whose cpus' topology matches the one we are
- * dealing with, so activating a new runqueue is what we want.
- */
- if ( prv->rqd[rqi].id == -1 )
- break;
-
- rqd = prv->rqd + rqi;
- BUG_ON(cpumask_empty(&rqd->active));
-
- peer_cpu = cpumask_first(&rqd->active);
- BUG_ON(cpu_to_socket(cpu) == XEN_INVALID_SOCKET_ID ||
- cpu_to_socket(peer_cpu) == XEN_INVALID_SOCKET_ID);
-
- if ( opt_runqueue == OPT_RUNQUEUE_ALL ||
- (opt_runqueue == OPT_RUNQUEUE_CORE && same_core(peer_cpu, cpu)) ||
- (opt_runqueue == OPT_RUNQUEUE_SOCKET && same_socket(peer_cpu, cpu)) ||
- (opt_runqueue == OPT_RUNQUEUE_NODE && same_node(peer_cpu, cpu)) )
- break;
- }
-
- /* We really expect to be able to assign each cpu to a runqueue. */
- BUG_ON(rqi >= nr_cpu_ids);
-
- return rqi;
-}
-
/* Returns the ID of the runqueue the cpu is assigned to. */
static unsigned
init_pdata(struct csched2_private *prv, unsigned int cpu)
So that they're all close among each other, and also near to the comment describind the runqueue organization (which is also moved). No functional change intended. Signed-off-by: Dario Faggioli <dario.faggioli@citrix.com> --- Cc: George Dunlap <george.dunlap@eu.citrix.com> Cc: Anshul Makkar <anshul.makkar@citrix.com> --- xen/common/sched_credit2.c | 572 ++++++++++++++++++++++---------------------- 1 file changed, 286 insertions(+), 286 deletions(-)