diff mbox

[v2,06/10] xen: credit2: group the runq manipulating functions.

Message ID 148664874043.595.13837823575489312498.stgit@Solace.fritz.box (mailing list archive)
State New, archived
Headers show

Commit Message

Dario Faggioli Feb. 9, 2017, 1:59 p.m. UTC
So that they're all close among each other, and
also near to the comment describind the runqueue
organization (which is also moved).

No functional change intended.

Signed-off-by: Dario Faggioli <dario.faggioli@citrix.com>
---
Cc: George Dunlap <george.dunlap@eu.citrix.com>
Cc: Anshul Makkar <anshul.makkar@citrix.com>
---
 xen/common/sched_credit2.c |  572 ++++++++++++++++++++++----------------------
 1 file changed, 286 insertions(+), 286 deletions(-)

Comments

George Dunlap Feb. 15, 2017, 2:42 p.m. UTC | #1
On Thu, Feb 9, 2017 at 1:59 PM, Dario Faggioli
<dario.faggioli@citrix.com> wrote:
> So that they're all close among each other, and
> also near to the comment describind the runqueue
> organization (which is also moved).
>
> No functional change intended.
>
> Signed-off-by: Dario Faggioli <dario.faggioli@citrix.com>
> ---
> Cc: George Dunlap <george.dunlap@eu.citrix.com>
> Cc: Anshul Makkar <anshul.makkar@citrix.com>
> ---
>  xen/common/sched_credit2.c |  572 ++++++++++++++++++++++----------------------
>  1 file changed, 286 insertions(+), 286 deletions(-)
>
> diff --git a/xen/common/sched_credit2.c b/xen/common/sched_credit2.c
> index 4b4f4f8..addee7b 100644
> --- a/xen/common/sched_credit2.c
> +++ b/xen/common/sched_credit2.c
> @@ -295,63 +295,6 @@ static int __read_mostly opt_overload_balance_tolerance = -3;
>  integer_param("credit2_balance_over", opt_overload_balance_tolerance);
>
>  /*
> - * Runqueue organization.
> - *
> - * The various cpus are to be assigned each one to a runqueue, and we
> - * want that to happen basing on topology. At the moment, it is possible
> - * to choose to arrange runqueues to be:
> - *
> - * - per-core: meaning that there will be one runqueue per each physical
> - *             core of the host. This will happen if the opt_runqueue
> - *             parameter is set to 'core';
> - *
> - * - per-socket: meaning that there will be one runqueue per each physical
> - *               socket (AKA package, which often, but not always, also
> - *               matches a NUMA node) of the host; This will happen if
> - *               the opt_runqueue parameter is set to 'socket';
> - *
> - * - per-node: meaning that there will be one runqueue per each physical
> - *             NUMA node of the host. This will happen if the opt_runqueue
> - *             parameter is set to 'node';
> - *
> - * - global: meaning that there will be only one runqueue to which all the
> - *           (logical) processors of the host belong. This will happen if
> - *           the opt_runqueue parameter is set to 'all'.
> - *
> - * Depending on the value of opt_runqueue, therefore, cpus that are part of
> - * either the same physical core, the same physical socket, the same NUMA
> - * node, or just all of them, will be put together to form runqueues.
> - */
> -#define OPT_RUNQUEUE_CORE   0
> -#define OPT_RUNQUEUE_SOCKET 1
> -#define OPT_RUNQUEUE_NODE   2
> -#define OPT_RUNQUEUE_ALL    3
> -static const char *const opt_runqueue_str[] = {
> -    [OPT_RUNQUEUE_CORE] = "core",
> -    [OPT_RUNQUEUE_SOCKET] = "socket",
> -    [OPT_RUNQUEUE_NODE] = "node",
> -    [OPT_RUNQUEUE_ALL] = "all"
> -};
> -static int __read_mostly opt_runqueue = OPT_RUNQUEUE_SOCKET;
> -
> -static void parse_credit2_runqueue(const char *s)
> -{
> -    unsigned int i;
> -
> -    for ( i = 0; i < ARRAY_SIZE(opt_runqueue_str); i++ )
> -    {
> -        if ( !strcmp(s, opt_runqueue_str[i]) )
> -        {
> -            opt_runqueue = i;
> -            return;
> -        }
> -    }
> -
> -    printk("WARNING, unrecognized value of credit2_runqueue option!\n");
> -}
> -custom_param("credit2_runqueue", parse_credit2_runqueue);

Most of the motion makes sense, but moving the option parser along too
seems a bit strange.  Wouldn't it make more sense to leave it with the
other option parsers?  (Perhaps with a "pointer" comment if you want
to move the main bulk of the comment?)

Everything else looks good.

 -George

> -
> -/*
>   * Per-runqueue data
>   */
>  struct csched2_runqueue_data {
> @@ -563,45 +506,304 @@ static int get_fallback_cpu(struct csched2_vcpu *svc)
>          return cpumask_first(cpumask_scratch_cpu(cpu));
>      }
>
> -    ASSERT(!cpumask_empty(cpumask_scratch_cpu(cpu)));
> -
> -    return cpumask_first(cpumask_scratch_cpu(cpu));
> +    ASSERT(!cpumask_empty(cpumask_scratch_cpu(cpu)));
> +
> +    return cpumask_first(cpumask_scratch_cpu(cpu));
> +}
> +
> +/*
> + * Time-to-credit, credit-to-time.
> + *
> + * We keep track of the "residual" time to make sure that frequent short
> + * schedules still get accounted for in the end.
> + *
> + * FIXME: Do pre-calculated division?
> + */
> +static void t2c_update(struct csched2_runqueue_data *rqd, s_time_t time,
> +                          struct csched2_vcpu *svc)
> +{
> +    uint64_t val = time * rqd->max_weight + svc->residual;
> +
> +    svc->residual = do_div(val, svc->weight);
> +    svc->credit -= val;
> +}
> +
> +static s_time_t c2t(struct csched2_runqueue_data *rqd, s_time_t credit, struct csched2_vcpu *svc)
> +{
> +    return credit * svc->weight / rqd->max_weight;
> +}
> +
> +/*
> + * Runqueue related code.
> + */
> +
> +/*
> + * Runqueue organization.
> + *
> + * The various cpus are to be assigned each one to a runqueue, and we
> + * want that to happen basing on topology. At the moment, it is possible
> + * to choose to arrange runqueues to be:
> + *
> + * - per-core: meaning that there will be one runqueue per each physical
> + *             core of the host. This will happen if the opt_runqueue
> + *             parameter is set to 'core';
> + *
> + * - per-socket: meaning that there will be one runqueue per each physical
> + *               socket (AKA package, which often, but not always, also
> + *               matches a NUMA node) of the host; This will happen if
> + *               the opt_runqueue parameter is set to 'socket';
> + *
> + * - per-node: meaning that there will be one runqueue per each physical
> + *             NUMA node of the host. This will happen if the opt_runqueue
> + *             parameter is set to 'node';
> + *
> + * - global: meaning that there will be only one runqueue to which all the
> + *           (logical) processors of the host belong. This will happen if
> + *           the opt_runqueue parameter is set to 'all'.
> + *
> + * Depending on the value of opt_runqueue, therefore, cpus that are part of
> + * either the same physical core, the same physical socket, the same NUMA
> + * node, or just all of them, will be put together to form runqueues.
> + */
> +#define OPT_RUNQUEUE_CORE   0
> +#define OPT_RUNQUEUE_SOCKET 1
> +#define OPT_RUNQUEUE_NODE   2
> +#define OPT_RUNQUEUE_ALL    3
> +static const char *const opt_runqueue_str[] = {
> +    [OPT_RUNQUEUE_CORE] = "core",
> +    [OPT_RUNQUEUE_SOCKET] = "socket",
> +    [OPT_RUNQUEUE_NODE] = "node",
> +    [OPT_RUNQUEUE_ALL] = "all"
> +};
> +static int __read_mostly opt_runqueue = OPT_RUNQUEUE_SOCKET;
> +
> +static void parse_credit2_runqueue(const char *s)
> +{
> +    unsigned int i;
> +
> +    for ( i = 0; i < ARRAY_SIZE(opt_runqueue_str); i++ )
> +    {
> +        if ( !strcmp(s, opt_runqueue_str[i]) )
> +        {
> +            opt_runqueue = i;
> +            return;
> +        }
> +    }
> +
> +    printk("WARNING, unrecognized value of credit2_runqueue option!\n");
> +}
> +custom_param("credit2_runqueue", parse_credit2_runqueue);
> +
> +static inline int vcpu_on_runq(struct csched2_vcpu *svc)
> +{
> +    return !list_empty(&svc->runq_elem);
> +}
> +
> +static struct csched2_vcpu * runq_elem(struct list_head *elem)
> +{
> +    return list_entry(elem, struct csched2_vcpu, runq_elem);
> +}
> +
> +static void activate_runqueue(struct csched2_private *prv, int rqi)
> +{
> +    struct csched2_runqueue_data *rqd;
> +
> +    rqd = prv->rqd + rqi;
> +
> +    BUG_ON(!cpumask_empty(&rqd->active));
> +
> +    rqd->max_weight = 1;
> +    rqd->id = rqi;
> +    INIT_LIST_HEAD(&rqd->svc);
> +    INIT_LIST_HEAD(&rqd->runq);
> +    spin_lock_init(&rqd->lock);
> +
> +    __cpumask_set_cpu(rqi, &prv->active_queues);
> +}
> +
> +static void deactivate_runqueue(struct csched2_private *prv, int rqi)
> +{
> +    struct csched2_runqueue_data *rqd;
> +
> +    rqd = prv->rqd + rqi;
> +
> +    BUG_ON(!cpumask_empty(&rqd->active));
> +
> +    rqd->id = -1;
> +
> +    __cpumask_clear_cpu(rqi, &prv->active_queues);
> +}
> +
> +static inline bool_t same_node(unsigned int cpua, unsigned int cpub)
> +{
> +    return cpu_to_node(cpua) == cpu_to_node(cpub);
> +}
> +
> +static inline bool_t same_socket(unsigned int cpua, unsigned int cpub)
> +{
> +    return cpu_to_socket(cpua) == cpu_to_socket(cpub);
> +}
> +
> +static inline bool_t same_core(unsigned int cpua, unsigned int cpub)
> +{
> +    return same_socket(cpua, cpub) &&
> +           cpu_to_core(cpua) == cpu_to_core(cpub);
> +}
> +
> +static unsigned int
> +cpu_to_runqueue(struct csched2_private *prv, unsigned int cpu)
> +{
> +    struct csched2_runqueue_data *rqd;
> +    unsigned int rqi;
> +
> +    for ( rqi = 0; rqi < nr_cpu_ids; rqi++ )
> +    {
> +        unsigned int peer_cpu;
> +
> +        /*
> +         * As soon as we come across an uninitialized runqueue, use it.
> +         * In fact, either:
> +         *  - we are initializing the first cpu, and we assign it to
> +         *    runqueue 0. This is handy, especially if we are dealing
> +         *    with the boot cpu (if credit2 is the default scheduler),
> +         *    as we would not be able to use cpu_to_socket() and similar
> +         *    helpers anyway (they're result of which is not reliable yet);
> +         *  - we have gone through all the active runqueues, and have not
> +         *    found anyone whose cpus' topology matches the one we are
> +         *    dealing with, so activating a new runqueue is what we want.
> +         */
> +        if ( prv->rqd[rqi].id == -1 )
> +            break;
> +
> +        rqd = prv->rqd + rqi;
> +        BUG_ON(cpumask_empty(&rqd->active));
> +
> +        peer_cpu = cpumask_first(&rqd->active);
> +        BUG_ON(cpu_to_socket(cpu) == XEN_INVALID_SOCKET_ID ||
> +               cpu_to_socket(peer_cpu) == XEN_INVALID_SOCKET_ID);
> +
> +        if ( opt_runqueue == OPT_RUNQUEUE_ALL ||
> +             (opt_runqueue == OPT_RUNQUEUE_CORE && same_core(peer_cpu, cpu)) ||
> +             (opt_runqueue == OPT_RUNQUEUE_SOCKET && same_socket(peer_cpu, cpu)) ||
> +             (opt_runqueue == OPT_RUNQUEUE_NODE && same_node(peer_cpu, cpu)) )
> +            break;
> +    }
> +
> +    /* We really expect to be able to assign each cpu to a runqueue. */
> +    BUG_ON(rqi >= nr_cpu_ids);
> +
> +    return rqi;
> +}
> +
> +/* Find the domain with the highest weight. */
> +static void update_max_weight(struct csched2_runqueue_data *rqd, int new_weight,
> +                              int old_weight)
> +{
> +    /* Try to avoid brute-force search:
> +     * - If new_weight is larger, max_weigth <- new_weight
> +     * - If old_weight != max_weight, someone else is still max_weight
> +     *   (No action required)
> +     * - If old_weight == max_weight, brute-force search for max weight
> +     */
> +    if ( new_weight > rqd->max_weight )
> +    {
> +        rqd->max_weight = new_weight;
> +        SCHED_STAT_CRANK(upd_max_weight_quick);
> +    }
> +    else if ( old_weight == rqd->max_weight )
> +    {
> +        struct list_head *iter;
> +        int max_weight = 1;
> +
> +        list_for_each( iter, &rqd->svc )
> +        {
> +            struct csched2_vcpu * svc = list_entry(iter, struct csched2_vcpu, rqd_elem);
> +
> +            if ( svc->weight > max_weight )
> +                max_weight = svc->weight;
> +        }
> +
> +        rqd->max_weight = max_weight;
> +        SCHED_STAT_CRANK(upd_max_weight_full);
> +    }
> +
> +    if ( unlikely(tb_init_done) )
> +    {
> +        struct {
> +            unsigned rqi:16, max_weight:16;
> +        } d;
> +        d.rqi = rqd->id;
> +        d.max_weight = rqd->max_weight;
> +        __trace_var(TRC_CSCHED2_RUNQ_MAX_WEIGHT, 1,
> +                    sizeof(d),
> +                    (unsigned char *)&d);
> +    }
> +}
> +
> +/* Add and remove from runqueue assignment (not active run queue) */
> +static void
> +_runq_assign(struct csched2_vcpu *svc, struct csched2_runqueue_data *rqd)
> +{
> +
> +    svc->rqd = rqd;
> +    list_add_tail(&svc->rqd_elem, &svc->rqd->svc);
> +
> +    update_max_weight(svc->rqd, svc->weight, 0);
> +
> +    /* Expected new load based on adding this vcpu */
> +    rqd->b_avgload += svc->avgload;
> +
> +    if ( unlikely(tb_init_done) )
> +    {
> +        struct {
> +            unsigned vcpu:16, dom:16;
> +            unsigned rqi:16;
> +        } d;
> +        d.dom = svc->vcpu->domain->domain_id;
> +        d.vcpu = svc->vcpu->vcpu_id;
> +        d.rqi=rqd->id;
> +        __trace_var(TRC_CSCHED2_RUNQ_ASSIGN, 1,
> +                    sizeof(d),
> +                    (unsigned char *)&d);
> +    }
> +
>  }
>
> -/*
> - * Time-to-credit, credit-to-time.
> - *
> - * We keep track of the "residual" time to make sure that frequent short
> - * schedules still get accounted for in the end.
> - *
> - * FIXME: Do pre-calculated division?
> - */
> -static void t2c_update(struct csched2_runqueue_data *rqd, s_time_t time,
> -                          struct csched2_vcpu *svc)
> +static void
> +runq_assign(const struct scheduler *ops, struct vcpu *vc)
>  {
> -    uint64_t val = time * rqd->max_weight + svc->residual;
> +    struct csched2_vcpu *svc = vc->sched_priv;
>
> -    svc->residual = do_div(val, svc->weight);
> -    svc->credit -= val;
> +    ASSERT(svc->rqd == NULL);
> +
> +    _runq_assign(svc, c2rqd(ops, vc->processor));
>  }
>
> -static s_time_t c2t(struct csched2_runqueue_data *rqd, s_time_t credit, struct csched2_vcpu *svc)
> +static void
> +_runq_deassign(struct csched2_vcpu *svc)
>  {
> -    return credit * svc->weight / rqd->max_weight;
> -}
> +    struct csched2_runqueue_data *rqd = svc->rqd;
>
> -/*
> - * Runqueue related code
> - */
> +    ASSERT(!vcpu_on_runq(svc));
> +    ASSERT(!(svc->flags & CSFLAG_scheduled));
>
> -static inline int vcpu_on_runq(struct csched2_vcpu *svc)
> -{
> -    return !list_empty(&svc->runq_elem);
> +    list_del_init(&svc->rqd_elem);
> +    update_max_weight(rqd, 0, svc->weight);
> +
> +    /* Expected new load based on removing this vcpu */
> +    rqd->b_avgload = max_t(s_time_t, rqd->b_avgload - svc->avgload, 0);
> +
> +    svc->rqd = NULL;
>  }
>
> -static struct csched2_vcpu * runq_elem(struct list_head *elem)
> +static void
> +runq_deassign(const struct scheduler *ops, struct vcpu *vc)
>  {
> -    return list_entry(elem, struct csched2_vcpu, runq_elem);
> +    struct csched2_vcpu *svc = vc->sched_priv;
> +
> +    ASSERT(svc->rqd == c2rqd(ops, vc->processor));
> +
> +    _runq_deassign(svc);
>  }
>
>  /*
> @@ -1218,51 +1420,6 @@ void burn_credits(struct csched2_runqueue_data *rqd,
>      }
>  }
>
> -/* Find the domain with the highest weight. */
> -static void update_max_weight(struct csched2_runqueue_data *rqd, int new_weight,
> -                              int old_weight)
> -{
> -    /* Try to avoid brute-force search:
> -     * - If new_weight is larger, max_weigth <- new_weight
> -     * - If old_weight != max_weight, someone else is still max_weight
> -     *   (No action required)
> -     * - If old_weight == max_weight, brute-force search for max weight
> -     */
> -    if ( new_weight > rqd->max_weight )
> -    {
> -        rqd->max_weight = new_weight;
> -        SCHED_STAT_CRANK(upd_max_weight_quick);
> -    }
> -    else if ( old_weight == rqd->max_weight )
> -    {
> -        struct list_head *iter;
> -        int max_weight = 1;
> -
> -        list_for_each( iter, &rqd->svc )
> -        {
> -            struct csched2_vcpu * svc = list_entry(iter, struct csched2_vcpu, rqd_elem);
> -
> -            if ( svc->weight > max_weight )
> -                max_weight = svc->weight;
> -        }
> -
> -        rqd->max_weight = max_weight;
> -        SCHED_STAT_CRANK(upd_max_weight_full);
> -    }
> -
> -    if ( unlikely(tb_init_done) )
> -    {
> -        struct {
> -            unsigned rqi:16, max_weight:16;
> -        } d;
> -        d.rqi = rqd->id;
> -        d.max_weight = rqd->max_weight;
> -        __trace_var(TRC_CSCHED2_RUNQ_MAX_WEIGHT, 1,
> -                    sizeof(d),
> -                    (unsigned char *)&d);
> -    }
> -}
> -
>  #ifndef NDEBUG
>  static inline void
>  csched2_vcpu_check(struct vcpu *vc)
> @@ -1327,72 +1484,6 @@ csched2_alloc_vdata(const struct scheduler *ops, struct vcpu *vc, void *dd)
>      return svc;
>  }
>
> -/* Add and remove from runqueue assignment (not active run queue) */
> -static void
> -_runq_assign(struct csched2_vcpu *svc, struct csched2_runqueue_data *rqd)
> -{
> -
> -    svc->rqd = rqd;
> -    list_add_tail(&svc->rqd_elem, &svc->rqd->svc);
> -
> -    update_max_weight(svc->rqd, svc->weight, 0);
> -
> -    /* Expected new load based on adding this vcpu */
> -    rqd->b_avgload += svc->avgload;
> -
> -    if ( unlikely(tb_init_done) )
> -    {
> -        struct {
> -            unsigned vcpu:16, dom:16;
> -            unsigned rqi:16;
> -        } d;
> -        d.dom = svc->vcpu->domain->domain_id;
> -        d.vcpu = svc->vcpu->vcpu_id;
> -        d.rqi=rqd->id;
> -        __trace_var(TRC_CSCHED2_RUNQ_ASSIGN, 1,
> -                    sizeof(d),
> -                    (unsigned char *)&d);
> -    }
> -
> -}
> -
> -static void
> -runq_assign(const struct scheduler *ops, struct vcpu *vc)
> -{
> -    struct csched2_vcpu *svc = vc->sched_priv;
> -
> -    ASSERT(svc->rqd == NULL);
> -
> -    _runq_assign(svc, c2rqd(ops, vc->processor));
> -}
> -
> -static void
> -_runq_deassign(struct csched2_vcpu *svc)
> -{
> -    struct csched2_runqueue_data *rqd = svc->rqd;
> -
> -    ASSERT(!vcpu_on_runq(svc));
> -    ASSERT(!(svc->flags & CSFLAG_scheduled));
> -
> -    list_del_init(&svc->rqd_elem);
> -    update_max_weight(rqd, 0, svc->weight);
> -
> -    /* Expected new load based on removing this vcpu */
> -    rqd->b_avgload = max_t(s_time_t, rqd->b_avgload - svc->avgload, 0);
> -
> -    svc->rqd = NULL;
> -}
> -
> -static void
> -runq_deassign(const struct scheduler *ops, struct vcpu *vc)
> -{
> -    struct csched2_vcpu *svc = vc->sched_priv;
> -
> -    ASSERT(svc->rqd == c2rqd(ops, vc->processor));
> -
> -    _runq_deassign(svc);
> -}
> -
>  static void
>  csched2_vcpu_sleep(const struct scheduler *ops, struct vcpu *vc)
>  {
> @@ -2776,97 +2867,6 @@ csched2_dump(const struct scheduler *ops)
>  #undef cpustr
>  }
>
> -static void activate_runqueue(struct csched2_private *prv, int rqi)
> -{
> -    struct csched2_runqueue_data *rqd;
> -
> -    rqd = prv->rqd + rqi;
> -
> -    BUG_ON(!cpumask_empty(&rqd->active));
> -
> -    rqd->max_weight = 1;
> -    rqd->id = rqi;
> -    INIT_LIST_HEAD(&rqd->svc);
> -    INIT_LIST_HEAD(&rqd->runq);
> -    spin_lock_init(&rqd->lock);
> -
> -    __cpumask_set_cpu(rqi, &prv->active_queues);
> -}
> -
> -static void deactivate_runqueue(struct csched2_private *prv, int rqi)
> -{
> -    struct csched2_runqueue_data *rqd;
> -
> -    rqd = prv->rqd + rqi;
> -
> -    BUG_ON(!cpumask_empty(&rqd->active));
> -
> -    rqd->id = -1;
> -
> -    __cpumask_clear_cpu(rqi, &prv->active_queues);
> -}
> -
> -static inline bool_t same_node(unsigned int cpua, unsigned int cpub)
> -{
> -    return cpu_to_node(cpua) == cpu_to_node(cpub);
> -}
> -
> -static inline bool_t same_socket(unsigned int cpua, unsigned int cpub)
> -{
> -    return cpu_to_socket(cpua) == cpu_to_socket(cpub);
> -}
> -
> -static inline bool_t same_core(unsigned int cpua, unsigned int cpub)
> -{
> -    return same_socket(cpua, cpub) &&
> -           cpu_to_core(cpua) == cpu_to_core(cpub);
> -}
> -
> -static unsigned int
> -cpu_to_runqueue(struct csched2_private *prv, unsigned int cpu)
> -{
> -    struct csched2_runqueue_data *rqd;
> -    unsigned int rqi;
> -
> -    for ( rqi = 0; rqi < nr_cpu_ids; rqi++ )
> -    {
> -        unsigned int peer_cpu;
> -
> -        /*
> -         * As soon as we come across an uninitialized runqueue, use it.
> -         * In fact, either:
> -         *  - we are initializing the first cpu, and we assign it to
> -         *    runqueue 0. This is handy, especially if we are dealing
> -         *    with the boot cpu (if credit2 is the default scheduler),
> -         *    as we would not be able to use cpu_to_socket() and similar
> -         *    helpers anyway (they're result of which is not reliable yet);
> -         *  - we have gone through all the active runqueues, and have not
> -         *    found anyone whose cpus' topology matches the one we are
> -         *    dealing with, so activating a new runqueue is what we want.
> -         */
> -        if ( prv->rqd[rqi].id == -1 )
> -            break;
> -
> -        rqd = prv->rqd + rqi;
> -        BUG_ON(cpumask_empty(&rqd->active));
> -
> -        peer_cpu = cpumask_first(&rqd->active);
> -        BUG_ON(cpu_to_socket(cpu) == XEN_INVALID_SOCKET_ID ||
> -               cpu_to_socket(peer_cpu) == XEN_INVALID_SOCKET_ID);
> -
> -        if ( opt_runqueue == OPT_RUNQUEUE_ALL ||
> -             (opt_runqueue == OPT_RUNQUEUE_CORE && same_core(peer_cpu, cpu)) ||
> -             (opt_runqueue == OPT_RUNQUEUE_SOCKET && same_socket(peer_cpu, cpu)) ||
> -             (opt_runqueue == OPT_RUNQUEUE_NODE && same_node(peer_cpu, cpu)) )
> -            break;
> -    }
> -
> -    /* We really expect to be able to assign each cpu to a runqueue. */
> -    BUG_ON(rqi >= nr_cpu_ids);
> -
> -    return rqi;
> -}
> -
>  /* Returns the ID of the runqueue the cpu is assigned to. */
>  static unsigned
>  init_pdata(struct csched2_private *prv, unsigned int cpu)
>
>
> _______________________________________________
> Xen-devel mailing list
> Xen-devel@lists.xen.org
> https://lists.xen.org/xen-devel
Dario Faggioli Feb. 27, 2017, 6:25 p.m. UTC | #2
On Wed, 2017-02-15 at 14:42 +0000, George Dunlap wrote:
> Most of the motion makes sense, but moving the option parser along
> too
> seems a bit strange.  Wouldn't it make more sense to leave it with
> the
> other option parsers?  
>
Well, it's a runqueue related parameter, so it's runqueue related code,
and this is why I moved it.

_BUT_ after all, I think I agree with you. More specifically, I think
that either all the option parser are in "sections" where they belong,
depending on the option they take care of, or they all live together.

So, yes, let's keep them together. I'll see about moving each one in a
more specific place in the file and, if I decide to go for it, send a
patch.

Thanks and Regards,
Dario
diff mbox

Patch

diff --git a/xen/common/sched_credit2.c b/xen/common/sched_credit2.c
index 4b4f4f8..addee7b 100644
--- a/xen/common/sched_credit2.c
+++ b/xen/common/sched_credit2.c
@@ -295,63 +295,6 @@  static int __read_mostly opt_overload_balance_tolerance = -3;
 integer_param("credit2_balance_over", opt_overload_balance_tolerance);
 
 /*
- * Runqueue organization.
- *
- * The various cpus are to be assigned each one to a runqueue, and we
- * want that to happen basing on topology. At the moment, it is possible
- * to choose to arrange runqueues to be:
- *
- * - per-core: meaning that there will be one runqueue per each physical
- *             core of the host. This will happen if the opt_runqueue
- *             parameter is set to 'core';
- *
- * - per-socket: meaning that there will be one runqueue per each physical
- *               socket (AKA package, which often, but not always, also
- *               matches a NUMA node) of the host; This will happen if
- *               the opt_runqueue parameter is set to 'socket';
- *
- * - per-node: meaning that there will be one runqueue per each physical
- *             NUMA node of the host. This will happen if the opt_runqueue
- *             parameter is set to 'node';
- *
- * - global: meaning that there will be only one runqueue to which all the
- *           (logical) processors of the host belong. This will happen if
- *           the opt_runqueue parameter is set to 'all'.
- *
- * Depending on the value of opt_runqueue, therefore, cpus that are part of
- * either the same physical core, the same physical socket, the same NUMA
- * node, or just all of them, will be put together to form runqueues.
- */
-#define OPT_RUNQUEUE_CORE   0
-#define OPT_RUNQUEUE_SOCKET 1
-#define OPT_RUNQUEUE_NODE   2
-#define OPT_RUNQUEUE_ALL    3
-static const char *const opt_runqueue_str[] = {
-    [OPT_RUNQUEUE_CORE] = "core",
-    [OPT_RUNQUEUE_SOCKET] = "socket",
-    [OPT_RUNQUEUE_NODE] = "node",
-    [OPT_RUNQUEUE_ALL] = "all"
-};
-static int __read_mostly opt_runqueue = OPT_RUNQUEUE_SOCKET;
-
-static void parse_credit2_runqueue(const char *s)
-{
-    unsigned int i;
-
-    for ( i = 0; i < ARRAY_SIZE(opt_runqueue_str); i++ )
-    {
-        if ( !strcmp(s, opt_runqueue_str[i]) )
-        {
-            opt_runqueue = i;
-            return;
-        }
-    }
-
-    printk("WARNING, unrecognized value of credit2_runqueue option!\n");
-}
-custom_param("credit2_runqueue", parse_credit2_runqueue);
-
-/*
  * Per-runqueue data
  */
 struct csched2_runqueue_data {
@@ -563,45 +506,304 @@  static int get_fallback_cpu(struct csched2_vcpu *svc)
         return cpumask_first(cpumask_scratch_cpu(cpu));
     }
 
-    ASSERT(!cpumask_empty(cpumask_scratch_cpu(cpu)));
-
-    return cpumask_first(cpumask_scratch_cpu(cpu));
+    ASSERT(!cpumask_empty(cpumask_scratch_cpu(cpu)));
+
+    return cpumask_first(cpumask_scratch_cpu(cpu));
+}
+
+/*
+ * Time-to-credit, credit-to-time.
+ *
+ * We keep track of the "residual" time to make sure that frequent short
+ * schedules still get accounted for in the end.
+ *
+ * FIXME: Do pre-calculated division?
+ */
+static void t2c_update(struct csched2_runqueue_data *rqd, s_time_t time,
+                          struct csched2_vcpu *svc)
+{
+    uint64_t val = time * rqd->max_weight + svc->residual;
+
+    svc->residual = do_div(val, svc->weight);
+    svc->credit -= val;
+}
+
+static s_time_t c2t(struct csched2_runqueue_data *rqd, s_time_t credit, struct csched2_vcpu *svc)
+{
+    return credit * svc->weight / rqd->max_weight;
+}
+
+/*
+ * Runqueue related code.
+ */
+
+/*
+ * Runqueue organization.
+ *
+ * The various cpus are to be assigned each one to a runqueue, and we
+ * want that to happen basing on topology. At the moment, it is possible
+ * to choose to arrange runqueues to be:
+ *
+ * - per-core: meaning that there will be one runqueue per each physical
+ *             core of the host. This will happen if the opt_runqueue
+ *             parameter is set to 'core';
+ *
+ * - per-socket: meaning that there will be one runqueue per each physical
+ *               socket (AKA package, which often, but not always, also
+ *               matches a NUMA node) of the host; This will happen if
+ *               the opt_runqueue parameter is set to 'socket';
+ *
+ * - per-node: meaning that there will be one runqueue per each physical
+ *             NUMA node of the host. This will happen if the opt_runqueue
+ *             parameter is set to 'node';
+ *
+ * - global: meaning that there will be only one runqueue to which all the
+ *           (logical) processors of the host belong. This will happen if
+ *           the opt_runqueue parameter is set to 'all'.
+ *
+ * Depending on the value of opt_runqueue, therefore, cpus that are part of
+ * either the same physical core, the same physical socket, the same NUMA
+ * node, or just all of them, will be put together to form runqueues.
+ */
+#define OPT_RUNQUEUE_CORE   0
+#define OPT_RUNQUEUE_SOCKET 1
+#define OPT_RUNQUEUE_NODE   2
+#define OPT_RUNQUEUE_ALL    3
+static const char *const opt_runqueue_str[] = {
+    [OPT_RUNQUEUE_CORE] = "core",
+    [OPT_RUNQUEUE_SOCKET] = "socket",
+    [OPT_RUNQUEUE_NODE] = "node",
+    [OPT_RUNQUEUE_ALL] = "all"
+};
+static int __read_mostly opt_runqueue = OPT_RUNQUEUE_SOCKET;
+
+static void parse_credit2_runqueue(const char *s)
+{
+    unsigned int i;
+
+    for ( i = 0; i < ARRAY_SIZE(opt_runqueue_str); i++ )
+    {
+        if ( !strcmp(s, opt_runqueue_str[i]) )
+        {
+            opt_runqueue = i;
+            return;
+        }
+    }
+
+    printk("WARNING, unrecognized value of credit2_runqueue option!\n");
+}
+custom_param("credit2_runqueue", parse_credit2_runqueue);
+
+static inline int vcpu_on_runq(struct csched2_vcpu *svc)
+{
+    return !list_empty(&svc->runq_elem);
+}
+
+static struct csched2_vcpu * runq_elem(struct list_head *elem)
+{
+    return list_entry(elem, struct csched2_vcpu, runq_elem);
+}
+
+static void activate_runqueue(struct csched2_private *prv, int rqi)
+{
+    struct csched2_runqueue_data *rqd;
+
+    rqd = prv->rqd + rqi;
+
+    BUG_ON(!cpumask_empty(&rqd->active));
+
+    rqd->max_weight = 1;
+    rqd->id = rqi;
+    INIT_LIST_HEAD(&rqd->svc);
+    INIT_LIST_HEAD(&rqd->runq);
+    spin_lock_init(&rqd->lock);
+
+    __cpumask_set_cpu(rqi, &prv->active_queues);
+}
+
+static void deactivate_runqueue(struct csched2_private *prv, int rqi)
+{
+    struct csched2_runqueue_data *rqd;
+
+    rqd = prv->rqd + rqi;
+
+    BUG_ON(!cpumask_empty(&rqd->active));
+
+    rqd->id = -1;
+
+    __cpumask_clear_cpu(rqi, &prv->active_queues);
+}
+
+static inline bool_t same_node(unsigned int cpua, unsigned int cpub)
+{
+    return cpu_to_node(cpua) == cpu_to_node(cpub);
+}
+
+static inline bool_t same_socket(unsigned int cpua, unsigned int cpub)
+{
+    return cpu_to_socket(cpua) == cpu_to_socket(cpub);
+}
+
+static inline bool_t same_core(unsigned int cpua, unsigned int cpub)
+{
+    return same_socket(cpua, cpub) &&
+           cpu_to_core(cpua) == cpu_to_core(cpub);
+}
+
+static unsigned int
+cpu_to_runqueue(struct csched2_private *prv, unsigned int cpu)
+{
+    struct csched2_runqueue_data *rqd;
+    unsigned int rqi;
+
+    for ( rqi = 0; rqi < nr_cpu_ids; rqi++ )
+    {
+        unsigned int peer_cpu;
+
+        /*
+         * As soon as we come across an uninitialized runqueue, use it.
+         * In fact, either:
+         *  - we are initializing the first cpu, and we assign it to
+         *    runqueue 0. This is handy, especially if we are dealing
+         *    with the boot cpu (if credit2 is the default scheduler),
+         *    as we would not be able to use cpu_to_socket() and similar
+         *    helpers anyway (they're result of which is not reliable yet);
+         *  - we have gone through all the active runqueues, and have not
+         *    found anyone whose cpus' topology matches the one we are
+         *    dealing with, so activating a new runqueue is what we want.
+         */
+        if ( prv->rqd[rqi].id == -1 )
+            break;
+
+        rqd = prv->rqd + rqi;
+        BUG_ON(cpumask_empty(&rqd->active));
+
+        peer_cpu = cpumask_first(&rqd->active);
+        BUG_ON(cpu_to_socket(cpu) == XEN_INVALID_SOCKET_ID ||
+               cpu_to_socket(peer_cpu) == XEN_INVALID_SOCKET_ID);
+
+        if ( opt_runqueue == OPT_RUNQUEUE_ALL ||
+             (opt_runqueue == OPT_RUNQUEUE_CORE && same_core(peer_cpu, cpu)) ||
+             (opt_runqueue == OPT_RUNQUEUE_SOCKET && same_socket(peer_cpu, cpu)) ||
+             (opt_runqueue == OPT_RUNQUEUE_NODE && same_node(peer_cpu, cpu)) )
+            break;
+    }
+
+    /* We really expect to be able to assign each cpu to a runqueue. */
+    BUG_ON(rqi >= nr_cpu_ids);
+
+    return rqi;
+}
+
+/* Find the domain with the highest weight. */
+static void update_max_weight(struct csched2_runqueue_data *rqd, int new_weight,
+                              int old_weight)
+{
+    /* Try to avoid brute-force search:
+     * - If new_weight is larger, max_weigth <- new_weight
+     * - If old_weight != max_weight, someone else is still max_weight
+     *   (No action required)
+     * - If old_weight == max_weight, brute-force search for max weight
+     */
+    if ( new_weight > rqd->max_weight )
+    {
+        rqd->max_weight = new_weight;
+        SCHED_STAT_CRANK(upd_max_weight_quick);
+    }
+    else if ( old_weight == rqd->max_weight )
+    {
+        struct list_head *iter;
+        int max_weight = 1;
+
+        list_for_each( iter, &rqd->svc )
+        {
+            struct csched2_vcpu * svc = list_entry(iter, struct csched2_vcpu, rqd_elem);
+
+            if ( svc->weight > max_weight )
+                max_weight = svc->weight;
+        }
+
+        rqd->max_weight = max_weight;
+        SCHED_STAT_CRANK(upd_max_weight_full);
+    }
+
+    if ( unlikely(tb_init_done) )
+    {
+        struct {
+            unsigned rqi:16, max_weight:16;
+        } d;
+        d.rqi = rqd->id;
+        d.max_weight = rqd->max_weight;
+        __trace_var(TRC_CSCHED2_RUNQ_MAX_WEIGHT, 1,
+                    sizeof(d),
+                    (unsigned char *)&d);
+    }
+}
+
+/* Add and remove from runqueue assignment (not active run queue) */
+static void
+_runq_assign(struct csched2_vcpu *svc, struct csched2_runqueue_data *rqd)
+{
+
+    svc->rqd = rqd;
+    list_add_tail(&svc->rqd_elem, &svc->rqd->svc);
+
+    update_max_weight(svc->rqd, svc->weight, 0);
+
+    /* Expected new load based on adding this vcpu */
+    rqd->b_avgload += svc->avgload;
+
+    if ( unlikely(tb_init_done) )
+    {
+        struct {
+            unsigned vcpu:16, dom:16;
+            unsigned rqi:16;
+        } d;
+        d.dom = svc->vcpu->domain->domain_id;
+        d.vcpu = svc->vcpu->vcpu_id;
+        d.rqi=rqd->id;
+        __trace_var(TRC_CSCHED2_RUNQ_ASSIGN, 1,
+                    sizeof(d),
+                    (unsigned char *)&d);
+    }
+
 }
 
-/*
- * Time-to-credit, credit-to-time.
- * 
- * We keep track of the "residual" time to make sure that frequent short
- * schedules still get accounted for in the end.
- *
- * FIXME: Do pre-calculated division?
- */
-static void t2c_update(struct csched2_runqueue_data *rqd, s_time_t time,
-                          struct csched2_vcpu *svc)
+static void
+runq_assign(const struct scheduler *ops, struct vcpu *vc)
 {
-    uint64_t val = time * rqd->max_weight + svc->residual;
+    struct csched2_vcpu *svc = vc->sched_priv;
 
-    svc->residual = do_div(val, svc->weight);
-    svc->credit -= val;
+    ASSERT(svc->rqd == NULL);
+
+    _runq_assign(svc, c2rqd(ops, vc->processor));
 }
 
-static s_time_t c2t(struct csched2_runqueue_data *rqd, s_time_t credit, struct csched2_vcpu *svc)
+static void
+_runq_deassign(struct csched2_vcpu *svc)
 {
-    return credit * svc->weight / rqd->max_weight;
-}
+    struct csched2_runqueue_data *rqd = svc->rqd;
 
-/*
- * Runqueue related code
- */
+    ASSERT(!vcpu_on_runq(svc));
+    ASSERT(!(svc->flags & CSFLAG_scheduled));
 
-static inline int vcpu_on_runq(struct csched2_vcpu *svc)
-{
-    return !list_empty(&svc->runq_elem);
+    list_del_init(&svc->rqd_elem);
+    update_max_weight(rqd, 0, svc->weight);
+
+    /* Expected new load based on removing this vcpu */
+    rqd->b_avgload = max_t(s_time_t, rqd->b_avgload - svc->avgload, 0);
+
+    svc->rqd = NULL;
 }
 
-static struct csched2_vcpu * runq_elem(struct list_head *elem)
+static void
+runq_deassign(const struct scheduler *ops, struct vcpu *vc)
 {
-    return list_entry(elem, struct csched2_vcpu, runq_elem);
+    struct csched2_vcpu *svc = vc->sched_priv;
+
+    ASSERT(svc->rqd == c2rqd(ops, vc->processor));
+
+    _runq_deassign(svc);
 }
 
 /*
@@ -1218,51 +1420,6 @@  void burn_credits(struct csched2_runqueue_data *rqd,
     }
 }
 
-/* Find the domain with the highest weight. */
-static void update_max_weight(struct csched2_runqueue_data *rqd, int new_weight,
-                              int old_weight)
-{
-    /* Try to avoid brute-force search:
-     * - If new_weight is larger, max_weigth <- new_weight
-     * - If old_weight != max_weight, someone else is still max_weight
-     *   (No action required)
-     * - If old_weight == max_weight, brute-force search for max weight
-     */
-    if ( new_weight > rqd->max_weight )
-    {
-        rqd->max_weight = new_weight;
-        SCHED_STAT_CRANK(upd_max_weight_quick);
-    }
-    else if ( old_weight == rqd->max_weight )
-    {
-        struct list_head *iter;
-        int max_weight = 1;
-
-        list_for_each( iter, &rqd->svc )
-        {
-            struct csched2_vcpu * svc = list_entry(iter, struct csched2_vcpu, rqd_elem);
-
-            if ( svc->weight > max_weight )
-                max_weight = svc->weight;
-        }
-
-        rqd->max_weight = max_weight;
-        SCHED_STAT_CRANK(upd_max_weight_full);
-    }
-
-    if ( unlikely(tb_init_done) )
-    {
-        struct {
-            unsigned rqi:16, max_weight:16;
-        } d;
-        d.rqi = rqd->id;
-        d.max_weight = rqd->max_weight;
-        __trace_var(TRC_CSCHED2_RUNQ_MAX_WEIGHT, 1,
-                    sizeof(d),
-                    (unsigned char *)&d);
-    }
-}
-
 #ifndef NDEBUG
 static inline void
 csched2_vcpu_check(struct vcpu *vc)
@@ -1327,72 +1484,6 @@  csched2_alloc_vdata(const struct scheduler *ops, struct vcpu *vc, void *dd)
     return svc;
 }
 
-/* Add and remove from runqueue assignment (not active run queue) */
-static void
-_runq_assign(struct csched2_vcpu *svc, struct csched2_runqueue_data *rqd)
-{
-
-    svc->rqd = rqd;
-    list_add_tail(&svc->rqd_elem, &svc->rqd->svc);
-
-    update_max_weight(svc->rqd, svc->weight, 0);
-
-    /* Expected new load based on adding this vcpu */
-    rqd->b_avgload += svc->avgload;
-
-    if ( unlikely(tb_init_done) )
-    {
-        struct {
-            unsigned vcpu:16, dom:16;
-            unsigned rqi:16;
-        } d;
-        d.dom = svc->vcpu->domain->domain_id;
-        d.vcpu = svc->vcpu->vcpu_id;
-        d.rqi=rqd->id;
-        __trace_var(TRC_CSCHED2_RUNQ_ASSIGN, 1,
-                    sizeof(d),
-                    (unsigned char *)&d);
-    }
-
-}
-
-static void
-runq_assign(const struct scheduler *ops, struct vcpu *vc)
-{
-    struct csched2_vcpu *svc = vc->sched_priv;
-
-    ASSERT(svc->rqd == NULL);
-
-    _runq_assign(svc, c2rqd(ops, vc->processor));
-}
-
-static void
-_runq_deassign(struct csched2_vcpu *svc)
-{
-    struct csched2_runqueue_data *rqd = svc->rqd;
-
-    ASSERT(!vcpu_on_runq(svc));
-    ASSERT(!(svc->flags & CSFLAG_scheduled));
-
-    list_del_init(&svc->rqd_elem);
-    update_max_weight(rqd, 0, svc->weight);
-
-    /* Expected new load based on removing this vcpu */
-    rqd->b_avgload = max_t(s_time_t, rqd->b_avgload - svc->avgload, 0);
-
-    svc->rqd = NULL;
-}
-
-static void
-runq_deassign(const struct scheduler *ops, struct vcpu *vc)
-{
-    struct csched2_vcpu *svc = vc->sched_priv;
-
-    ASSERT(svc->rqd == c2rqd(ops, vc->processor));
-
-    _runq_deassign(svc);
-}
-
 static void
 csched2_vcpu_sleep(const struct scheduler *ops, struct vcpu *vc)
 {
@@ -2776,97 +2867,6 @@  csched2_dump(const struct scheduler *ops)
 #undef cpustr
 }
 
-static void activate_runqueue(struct csched2_private *prv, int rqi)
-{
-    struct csched2_runqueue_data *rqd;
-
-    rqd = prv->rqd + rqi;
-
-    BUG_ON(!cpumask_empty(&rqd->active));
-
-    rqd->max_weight = 1;
-    rqd->id = rqi;
-    INIT_LIST_HEAD(&rqd->svc);
-    INIT_LIST_HEAD(&rqd->runq);
-    spin_lock_init(&rqd->lock);
-
-    __cpumask_set_cpu(rqi, &prv->active_queues);
-}
-
-static void deactivate_runqueue(struct csched2_private *prv, int rqi)
-{
-    struct csched2_runqueue_data *rqd;
-
-    rqd = prv->rqd + rqi;
-
-    BUG_ON(!cpumask_empty(&rqd->active));
-    
-    rqd->id = -1;
-
-    __cpumask_clear_cpu(rqi, &prv->active_queues);
-}
-
-static inline bool_t same_node(unsigned int cpua, unsigned int cpub)
-{
-    return cpu_to_node(cpua) == cpu_to_node(cpub);
-}
-
-static inline bool_t same_socket(unsigned int cpua, unsigned int cpub)
-{
-    return cpu_to_socket(cpua) == cpu_to_socket(cpub);
-}
-
-static inline bool_t same_core(unsigned int cpua, unsigned int cpub)
-{
-    return same_socket(cpua, cpub) &&
-           cpu_to_core(cpua) == cpu_to_core(cpub);
-}
-
-static unsigned int
-cpu_to_runqueue(struct csched2_private *prv, unsigned int cpu)
-{
-    struct csched2_runqueue_data *rqd;
-    unsigned int rqi;
-
-    for ( rqi = 0; rqi < nr_cpu_ids; rqi++ )
-    {
-        unsigned int peer_cpu;
-
-        /*
-         * As soon as we come across an uninitialized runqueue, use it.
-         * In fact, either:
-         *  - we are initializing the first cpu, and we assign it to
-         *    runqueue 0. This is handy, especially if we are dealing
-         *    with the boot cpu (if credit2 is the default scheduler),
-         *    as we would not be able to use cpu_to_socket() and similar
-         *    helpers anyway (they're result of which is not reliable yet);
-         *  - we have gone through all the active runqueues, and have not
-         *    found anyone whose cpus' topology matches the one we are
-         *    dealing with, so activating a new runqueue is what we want.
-         */
-        if ( prv->rqd[rqi].id == -1 )
-            break;
-
-        rqd = prv->rqd + rqi;
-        BUG_ON(cpumask_empty(&rqd->active));
-
-        peer_cpu = cpumask_first(&rqd->active);
-        BUG_ON(cpu_to_socket(cpu) == XEN_INVALID_SOCKET_ID ||
-               cpu_to_socket(peer_cpu) == XEN_INVALID_SOCKET_ID);
-
-        if ( opt_runqueue == OPT_RUNQUEUE_ALL ||
-             (opt_runqueue == OPT_RUNQUEUE_CORE && same_core(peer_cpu, cpu)) ||
-             (opt_runqueue == OPT_RUNQUEUE_SOCKET && same_socket(peer_cpu, cpu)) ||
-             (opt_runqueue == OPT_RUNQUEUE_NODE && same_node(peer_cpu, cpu)) )
-            break;
-    }
-
-    /* We really expect to be able to assign each cpu to a runqueue. */
-    BUG_ON(rqi >= nr_cpu_ids);
-
-    return rqi;
-}
-
 /* Returns the ID of the runqueue the cpu is assigned to. */
 static unsigned
 init_pdata(struct csched2_private *prv, unsigned int cpu)