[4/9] xen: credit2: group the runq manipulating functions.

Message ID	148539061397.5464.1056583005783258880.stgit@Solace.fritz.box (mailing list archive)
State	New, archived
Headers	show Return-Path: <xen-devel-bounces@lists.xen.org> From: Dario Faggioli <dario.faggioli@citrix.com> To: xen-devel@lists.xenproject.org Date: Thu, 26 Jan 2017 01:30:14 +0100 Message-ID: <148539061397.5464.1056583005783258880.stgit@Solace.fritz.box> In-Reply-To: <148539008889.5464.5896389113741708672.stgit@Solace.fritz.box> References: <148539008889.5464.5896389113741708672.stgit@Solace.fritz.box> User-Agent: StGit/0.17.1-dirty MIME-Version: 1.0 Cc: George Dunlap <george.dunlap@eu.citrix.com>, Anshul Makkar <anshul.makkar@citrix.com> Subject: [Xen-devel] [PATCH 4/9] xen: credit2: group the runq manipulating functions. Precedence: list Content-Type: text/plain; charset="utf-8" Content-Transfer-Encoding: base64 Errors-To: xen-devel-bounces@lists.xen.org Sender: "Xen-devel" <xen-devel-bounces@lists.xen.org>

diff --git a/xen/common/sched_credit2.c b/xen/common/sched_credit2.c index b764cf9..f4c0ae4 100644 --- a/xen/common/sched_credit2.c +++ b/xen/common/sched_credit2.c @@ -295,63 +295,6 @@ static int __read_mostly opt_overload_balance_tolerance = -3; integer_param("credit2_balance_over", opt_overload_balance_tolerance); /* - * Runqueue organization. - * - * The various cpus are to be assigned each one to a runqueue, and we - * want that to happen basing on topology. At the moment, it is possible - * to choose to arrange runqueues to be: - * - * - per-core: meaning that there will be one runqueue per each physical - * core of the host. This will happen if the opt_runqueue - * parameter is set to 'core'; - * - * - per-socket: meaning that there will be one runqueue per each physical - * socket (AKA package, which often, but not always, also - * matches a NUMA node) of the host; This will happen if - * the opt_runqueue parameter is set to 'socket'; - * - * - per-node: meaning that there will be one runqueue per each physical - * NUMA node of the host. This will happen if the opt_runqueue - * parameter is set to 'node'; - * - * - global: meaning that there will be only one runqueue to which all the - * (logical) processors of the host belong. This will happen if - * the opt_runqueue parameter is set to 'all'. - * - * Depending on the value of opt_runqueue, therefore, cpus that are part of - * either the same physical core, the same physical socket, the same NUMA - * node, or just all of them, will be put together to form runqueues. - */ -#define OPT_RUNQUEUE_CORE 0 -#define OPT_RUNQUEUE_SOCKET 1 -#define OPT_RUNQUEUE_NODE 2 -#define OPT_RUNQUEUE_ALL 3 -static const char *const opt_runqueue_str[] = { - [OPT_RUNQUEUE_CORE] = "core", - [OPT_RUNQUEUE_SOCKET] = "socket", - [OPT_RUNQUEUE_NODE] = "node", - [OPT_RUNQUEUE_ALL] = "all" -}; -static int __read_mostly opt_runqueue = OPT_RUNQUEUE_SOCKET; - -static void parse_credit2_runqueue(const char *s) -{ - unsigned int i; - - for ( i = 0; i < ARRAY_SIZE(opt_runqueue_str); i++ ) - { - if ( !strcmp(s, opt_runqueue_str[i]) ) - { - opt_runqueue = i; - return; - } - } - - printk("WARNING, unrecognized value of credit2_runqueue option!\n"); -} -custom_param("credit2_runqueue", parse_credit2_runqueue); - -/* * Per-runqueue data */ struct csched2_runqueue_data { @@ -563,45 +506,304 @@ static int get_fallback_cpu(struct csched2_vcpu *svc) return cpumask_first(cpumask_scratch_cpu(cpu)); } - ASSERT(!cpumask_empty(cpumask_scratch_cpu(cpu))); - - return cpumask_first(cpumask_scratch_cpu(cpu)); + ASSERT(!cpumask_empty(cpumask_scratch_cpu(cpu))); + + return cpumask_first(cpumask_scratch_cpu(cpu)); +} + +/* + * Time-to-credit, credit-to-time. + * + * We keep track of the "residual" time to make sure that frequent short + * schedules still get accounted for in the end. + * + * FIXME: Do pre-calculated division? + */ +static void t2c_update(struct csched2_runqueue_data *rqd, s_time_t time, + struct csched2_vcpu *svc) +{ + uint64_t val = time * rqd->max_weight + svc->residual; + + svc->residual = do_div(val, svc->weight); + svc->credit -= val; +} + +static s_time_t c2t(struct csched2_runqueue_data *rqd, s_time_t credit, struct csched2_vcpu *svc) +{ + return credit * svc->weight / rqd->max_weight; +} + +/* + * Runqueue related code. + */ + +/* + * Runqueue organization. + * + * The various cpus are to be assigned each one to a runqueue, and we + * want that to happen basing on topology. At the moment, it is possible + * to choose to arrange runqueues to be: + * + * - per-core: meaning that there will be one runqueue per each physical + * core of the host. This will happen if the opt_runqueue + * parameter is set to 'core'; + * + * - per-socket: meaning that there will be one runqueue per each physical + * socket (AKA package, which often, but not always, also + * matches a NUMA node) of the host; This will happen if + * the opt_runqueue parameter is set to 'socket'; + * + * - per-node: meaning that there will be one runqueue per each physical + * NUMA node of the host. This will happen if the opt_runqueue + * parameter is set to 'node'; + * + * - global: meaning that there will be only one runqueue to which all the + * (logical) processors of the host belong. This will happen if + * the opt_runqueue parameter is set to 'all'. + * + * Depending on the value of opt_runqueue, therefore, cpus that are part of + * either the same physical core, the same physical socket, the same NUMA + * node, or just all of them, will be put together to form runqueues. + */ +#define OPT_RUNQUEUE_CORE 0 +#define OPT_RUNQUEUE_SOCKET 1 +#define OPT_RUNQUEUE_NODE 2 +#define OPT_RUNQUEUE_ALL 3 +static const char *const opt_runqueue_str[] = { + [OPT_RUNQUEUE_CORE] = "core", + [OPT_RUNQUEUE_SOCKET] = "socket", + [OPT_RUNQUEUE_NODE] = "node", + [OPT_RUNQUEUE_ALL] = "all" +}; +static int __read_mostly opt_runqueue = OPT_RUNQUEUE_SOCKET; + +static void parse_credit2_runqueue(const char *s) +{ + unsigned int i; + + for ( i = 0; i < ARRAY_SIZE(opt_runqueue_str); i++ ) + { + if ( !strcmp(s, opt_runqueue_str[i]) ) + { + opt_runqueue = i; + return; + } + } + + printk("WARNING, unrecognized value of credit2_runqueue option!\n"); +} +custom_param("credit2_runqueue", parse_credit2_runqueue); + +static inline int vcpu_on_runq(struct csched2_vcpu *svc) +{ + return !list_empty(&svc->runq_elem); +} + +static struct csched2_vcpu * runq_elem(struct list_head *elem) +{ + return list_entry(elem, struct csched2_vcpu, runq_elem); +} + +static void activate_runqueue(struct csched2_private *prv, int rqi) +{ + struct csched2_runqueue_data *rqd; + + rqd = prv->rqd + rqi; + + BUG_ON(!cpumask_empty(&rqd->active)); + + rqd->max_weight = 1; + rqd->id = rqi; + INIT_LIST_HEAD(&rqd->svc); + INIT_LIST_HEAD(&rqd->runq); + spin_lock_init(&rqd->lock); + + __cpumask_set_cpu(rqi, &prv->active_queues); +} + +static void deactivate_runqueue(struct csched2_private *prv, int rqi) +{ + struct csched2_runqueue_data *rqd; + + rqd = prv->rqd + rqi; + + BUG_ON(!cpumask_empty(&rqd->active)); + + rqd->id = -1; + + __cpumask_clear_cpu(rqi, &prv->active_queues); +} + +static inline bool_t same_node(unsigned int cpua, unsigned int cpub) +{ + return cpu_to_node(cpua) == cpu_to_node(cpub); +} + +static inline bool_t same_socket(unsigned int cpua, unsigned int cpub) +{ + return cpu_to_socket(cpua) == cpu_to_socket(cpub); +} + +static inline bool_t same_core(unsigned int cpua, unsigned int cpub) +{ + return same_socket(cpua, cpub) && + cpu_to_core(cpua) == cpu_to_core(cpub); +} + +static unsigned int +cpu_to_runqueue(struct csched2_private *prv, unsigned int cpu) +{ + struct csched2_runqueue_data *rqd; + unsigned int rqi; + + for ( rqi = 0; rqi < nr_cpu_ids; rqi++ ) + { + unsigned int peer_cpu; + + /* + * As soon as we come across an uninitialized runqueue, use it. + * In fact, either: + * - we are initializing the first cpu, and we assign it to + * runqueue 0. This is handy, especially if we are dealing + * with the boot cpu (if credit2 is the default scheduler), + * as we would not be able to use cpu_to_socket() and similar + * helpers anyway (they're result of which is not reliable yet); + * - we have gone through all the active runqueues, and have not + * found anyone whose cpus' topology matches the one we are + * dealing with, so activating a new runqueue is what we want. + */ + if ( prv->rqd[rqi].id == -1 ) + break; + + rqd = prv->rqd + rqi; + BUG_ON(cpumask_empty(&rqd->active)); + + peer_cpu = cpumask_first(&rqd->active); + BUG_ON(cpu_to_socket(cpu) == XEN_INVALID_SOCKET_ID || + cpu_to_socket(peer_cpu) == XEN_INVALID_SOCKET_ID); + + if ( opt_runqueue == OPT_RUNQUEUE_ALL || + (opt_runqueue == OPT_RUNQUEUE_CORE && same_core(peer_cpu, cpu)) || + (opt_runqueue == OPT_RUNQUEUE_SOCKET && same_socket(peer_cpu, cpu)) || + (opt_runqueue == OPT_RUNQUEUE_NODE && same_node(peer_cpu, cpu)) ) + break; + } + + /* We really expect to be able to assign each cpu to a runqueue. */ + BUG_ON(rqi >= nr_cpu_ids); + + return rqi; +} + +/* Find the domain with the highest weight. */ +static void update_max_weight(struct csched2_runqueue_data *rqd, int new_weight, + int old_weight) +{ + /* Try to avoid brute-force search: + * - If new_weight is larger, max_weigth <- new_weight + * - If old_weight != max_weight, someone else is still max_weight + * (No action required) + * - If old_weight == max_weight, brute-force search for max weight + */ + if ( new_weight > rqd->max_weight ) + { + rqd->max_weight = new_weight; + SCHED_STAT_CRANK(upd_max_weight_quick); + } + else if ( old_weight == rqd->max_weight ) + { + struct list_head *iter; + int max_weight = 1; + + list_for_each( iter, &rqd->svc ) + { + struct csched2_vcpu * svc = list_entry(iter, struct csched2_vcpu, rqd_elem); + + if ( svc->weight > max_weight ) + max_weight = svc->weight; + } + + rqd->max_weight = max_weight; + SCHED_STAT_CRANK(upd_max_weight_full); + } + + if ( unlikely(tb_init_done) ) + { + struct { + unsigned rqi:16, max_weight:16; + } d; + d.rqi = rqd->id; + d.max_weight = rqd->max_weight; + __trace_var(TRC_CSCHED2_RUNQ_MAX_WEIGHT, 1, + sizeof(d), + (unsigned char *)&d); + } +} + +/* Add and remove from runqueue assignment (not active run queue) */ +static void +_runq_assign(struct csched2_vcpu *svc, struct csched2_runqueue_data *rqd) +{ + + svc->rqd = rqd; + list_add_tail(&svc->rqd_elem, &svc->rqd->svc); + + update_max_weight(svc->rqd, svc->weight, 0); + + /* Expected new load based on adding this vcpu */ + rqd->b_avgload += svc->avgload; + + if ( unlikely(tb_init_done) ) + { + struct { + unsigned vcpu:16, dom:16; + unsigned rqi:16; + } d; + d.dom = svc->vcpu->domain->domain_id; + d.vcpu = svc->vcpu->vcpu_id; + d.rqi=rqd->id; + __trace_var(TRC_CSCHED2_RUNQ_ASSIGN, 1, + sizeof(d), + (unsigned char *)&d); + } + } -/* - * Time-to-credit, credit-to-time. - * - * We keep track of the "residual" time to make sure that frequent short - * schedules still get accounted for in the end. - * - * FIXME: Do pre-calculated division? - */ -static void t2c_update(struct csched2_runqueue_data *rqd, s_time_t time, - struct csched2_vcpu *svc) +static void +runq_assign(const struct scheduler *ops, struct vcpu *vc) { - uint64_t val = time * rqd->max_weight + svc->residual; + struct csched2_vcpu *svc = vc->sched_priv; - svc->residual = do_div(val, svc->weight); - svc->credit -= val; + ASSERT(svc->rqd == NULL); + + _runq_assign(svc, c2rqd(ops, vc->processor)); } -static s_time_t c2t(struct csched2_runqueue_data *rqd, s_time_t credit, struct csched2_vcpu *svc) +static void +_runq_deassign(struct csched2_vcpu *svc) { - return credit * svc->weight / rqd->max_weight; -} + struct csched2_runqueue_data *rqd = svc->rqd; -/* - * Runqueue related code - */ + ASSERT(!vcpu_on_runq(svc)); + ASSERT(!(svc->flags & CSFLAG_scheduled)); -static inline int vcpu_on_runq(struct csched2_vcpu *svc) -{ - return !list_empty(&svc->runq_elem); + list_del_init(&svc->rqd_elem); + update_max_weight(rqd, 0, svc->weight); + + /* Expected new load based on removing this vcpu */ + rqd->b_avgload = max_t(s_time_t, rqd->b_avgload - svc->avgload, 0); + + svc->rqd = NULL; } -static struct csched2_vcpu * runq_elem(struct list_head *elem) +static void +runq_deassign(const struct scheduler *ops, struct vcpu *vc) { - return list_entry(elem, struct csched2_vcpu, runq_elem); + struct csched2_vcpu *svc = vc->sched_priv; + + ASSERT(svc->rqd == c2rqd(ops, vc->processor)); + + _runq_deassign(svc); } /* @@ -1219,51 +1421,6 @@ void burn_credits(struct csched2_runqueue_data *rqd, } } -/* Find the domain with the highest weight. */ -static void update_max_weight(struct csched2_runqueue_data *rqd, int new_weight, - int old_weight) -{ - /* Try to avoid brute-force search: - * - If new_weight is larger, max_weigth <- new_weight - * - If old_weight != max_weight, someone else is still max_weight - * (No action required) - * - If old_weight == max_weight, brute-force search for max weight - */ - if ( new_weight > rqd->max_weight ) - { - rqd->max_weight = new_weight; - SCHED_STAT_CRANK(upd_max_weight_quick); - } - else if ( old_weight == rqd->max_weight ) - { - struct list_head *iter; - int max_weight = 1; - - list_for_each( iter, &rqd->svc ) - { - struct csched2_vcpu * svc = list_entry(iter, struct csched2_vcpu, rqd_elem); - - if ( svc->weight > max_weight ) - max_weight = svc->weight; - } - - rqd->max_weight = max_weight; - SCHED_STAT_CRANK(upd_max_weight_full); - } - - if ( unlikely(tb_init_done) ) - { - struct { - unsigned rqi:16, max_weight:16; - } d; - d.rqi = rqd->id; - d.max_weight = rqd->max_weight; - __trace_var(TRC_CSCHED2_RUNQ_MAX_WEIGHT, 1, - sizeof(d), - (unsigned char *)&d); - } -} - #ifndef NDEBUG static inline void csched2_vcpu_check(struct vcpu *vc) @@ -1328,72 +1485,6 @@ csched2_alloc_vdata(const struct scheduler *ops, struct vcpu *vc, void *dd) return svc; } -/* Add and remove from runqueue assignment (not active run queue) */ -static void -_runq_assign(struct csched2_vcpu *svc, struct csched2_runqueue_data *rqd) -{ - - svc->rqd = rqd; - list_add_tail(&svc->rqd_elem, &svc->rqd->svc); - - update_max_weight(svc->rqd, svc->weight, 0); - - /* Expected new load based on adding this vcpu */ - rqd->b_avgload += svc->avgload; - - if ( unlikely(tb_init_done) ) - { - struct { - unsigned vcpu:16, dom:16; - unsigned rqi:16; - } d; - d.dom = svc->vcpu->domain->domain_id; - d.vcpu = svc->vcpu->vcpu_id; - d.rqi=rqd->id; - __trace_var(TRC_CSCHED2_RUNQ_ASSIGN, 1, - sizeof(d), - (unsigned char *)&d); - } - -} - -static void -runq_assign(const struct scheduler *ops, struct vcpu *vc) -{ - struct csched2_vcpu *svc = vc->sched_priv; - - ASSERT(svc->rqd == NULL); - - _runq_assign(svc, c2rqd(ops, vc->processor)); -} - -static void -_runq_deassign(struct csched2_vcpu *svc) -{ - struct csched2_runqueue_data *rqd = svc->rqd; - - ASSERT(!vcpu_on_runq(svc)); - ASSERT(!(svc->flags & CSFLAG_scheduled)); - - list_del_init(&svc->rqd_elem); - update_max_weight(rqd, 0, svc->weight); - - /* Expected new load based on removing this vcpu */ - rqd->b_avgload = max_t(s_time_t, rqd->b_avgload - svc->avgload, 0); - - svc->rqd = NULL; -} - -static void -runq_deassign(const struct scheduler *ops, struct vcpu *vc) -{ - struct csched2_vcpu *svc = vc->sched_priv; - - ASSERT(svc->rqd == c2rqd(ops, vc->processor)); - - _runq_deassign(svc); -} - static void csched2_vcpu_sleep(const struct scheduler *ops, struct vcpu *vc) { @@ -2778,97 +2869,6 @@ csched2_dump(const struct scheduler *ops) #undef cpustr } -static void activate_runqueue(struct csched2_private *prv, int rqi) -{ - struct csched2_runqueue_data *rqd; - - rqd = prv->rqd + rqi; - - BUG_ON(!cpumask_empty(&rqd->active)); - - rqd->max_weight = 1; - rqd->id = rqi; - INIT_LIST_HEAD(&rqd->svc); - INIT_LIST_HEAD(&rqd->runq); - spin_lock_init(&rqd->lock); - - __cpumask_set_cpu(rqi, &prv->active_queues); -} - -static void deactivate_runqueue(struct csched2_private *prv, int rqi) -{ - struct csched2_runqueue_data *rqd; - - rqd = prv->rqd + rqi; - - BUG_ON(!cpumask_empty(&rqd->active)); - - rqd->id = -1; - - __cpumask_clear_cpu(rqi, &prv->active_queues); -} - -static inline bool_t same_node(unsigned int cpua, unsigned int cpub) -{ - return cpu_to_node(cpua) == cpu_to_node(cpub); -} - -static inline bool_t same_socket(unsigned int cpua, unsigned int cpub) -{ - return cpu_to_socket(cpua) == cpu_to_socket(cpub); -} - -static inline bool_t same_core(unsigned int cpua, unsigned int cpub) -{ - return same_socket(cpua, cpub) && - cpu_to_core(cpua) == cpu_to_core(cpub); -} - -static unsigned int -cpu_to_runqueue(struct csched2_private *prv, unsigned int cpu) -{ - struct csched2_runqueue_data *rqd; - unsigned int rqi; - - for ( rqi = 0; rqi < nr_cpu_ids; rqi++ ) - { - unsigned int peer_cpu; - - /* - * As soon as we come across an uninitialized runqueue, use it. - * In fact, either: - * - we are initializing the first cpu, and we assign it to - * runqueue 0. This is handy, especially if we are dealing - * with the boot cpu (if credit2 is the default scheduler), - * as we would not be able to use cpu_to_socket() and similar - * helpers anyway (they're result of which is not reliable yet); - * - we have gone through all the active runqueues, and have not - * found anyone whose cpus' topology matches the one we are - * dealing with, so activating a new runqueue is what we want. - */ - if ( prv->rqd[rqi].id == -1 ) - break; - - rqd = prv->rqd + rqi; - BUG_ON(cpumask_empty(&rqd->active)); - - peer_cpu = cpumask_first(&rqd->active); - BUG_ON(cpu_to_socket(cpu) == XEN_INVALID_SOCKET_ID || - cpu_to_socket(peer_cpu) == XEN_INVALID_SOCKET_ID); - - if ( opt_runqueue == OPT_RUNQUEUE_ALL || - (opt_runqueue == OPT_RUNQUEUE_CORE && same_core(peer_cpu, cpu)) || - (opt_runqueue == OPT_RUNQUEUE_SOCKET && same_socket(peer_cpu, cpu)) || - (opt_runqueue == OPT_RUNQUEUE_NODE && same_node(peer_cpu, cpu)) ) - break; - } - - /* We really expect to be able to assign each cpu to a runqueue. */ - BUG_ON(rqi >= nr_cpu_ids); - - return rqi; -} - /* Returns the ID of the runqueue the cpu is assigned to. */ static unsigned init_pdata(struct csched2_private *prv, unsigned int cpu)

[4/9] xen: credit2: group the runq manipulating functions.

Commit Message

Patch