[v3,41/47] xen/sched: split schedule_cpu_switch()

Message ID	20190914085251.18816-42-jgross@suse.com (mailing list archive)
State	Superseded
Headers	show Return-Path: <SRS0=7ja4=XJ=lists.xenproject.org=xen-devel-bounces@kernel.org> DMARC-Filter: OpenDMARC Filter v1.3.2 mail.kernel.org BDBBF20717 From: Juergen Gross <jgross@suse.com> To: xen-devel@lists.xenproject.org Date: Sat, 14 Sep 2019 10:52:45 +0200 Message-Id: <20190914085251.18816-42-jgross@suse.com> In-Reply-To: <20190914085251.18816-1-jgross@suse.com> References: <20190914085251.18816-1-jgross@suse.com> Subject: [Xen-devel] [PATCH v3 41/47] xen/sched: split schedule_cpu_switch() Precedence: list Cc: Juergen Gross <jgross@suse.com>, Tim Deegan <tim@xen.org>, Stefano Stabellini <sstabellini@kernel.org>, Wei Liu <wl@xen.org>, Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>, George Dunlap <George.Dunlap@eu.citrix.com>, Andrew Cooper <andrew.cooper3@citrix.com>, Ian Jackson <ian.jackson@eu.citrix.com>, Dario Faggioli <dfaggioli@suse.com>, Julien Grall <julien.grall@arm.com>, Jan Beulich <jbeulich@suse.com> MIME-Version: 1.0 Content-Type: text/plain; charset="utf-8" Content-Transfer-Encoding: base64 Errors-To: xen-devel-bounces@lists.xenproject.org Sender: "Xen-devel" <xen-devel-bounces@lists.xenproject.org>
Series	xen: add core scheduling support \| expand [v3,00/47] xen: add core scheduling support [v3,01/47] xen/sched: use new sched_unit instead of vcpu in scheduler interfaces [v3,02/47] xen/sched: move per-vcpu scheduler private data pointer to sched_unit [v3,03/47] xen/sched: build a linked list of struct sched_unit [v3,04/47] xen/sched: introduce struct sched_resource [v3,05/47] xen/sched: let pick_cpu return a scheduler resource [v3,06/47] xen/sched: switch schedule_data.curr to point at sched_unit [v3,07/47] xen/sched: move per cpu scheduler private data into struct sched_resource [v3,08/47] xen/sched: switch vcpu_schedule_lock to unit_schedule_lock [v3,09/47] xen/sched: move some per-vcpu items to struct sched_unit [v3,10/47] xen/sched: add scheduler helpers hiding vcpu [v3,11/47] xen/sched: rename scheduler related perf counters [v3,12/47] xen/sched: switch struct task_slice from vcpu to sched_unit [v3,13/47] xen/sched: add is_running indicator to struct sched_unit [v3,14/47] xen/sched: make null scheduler vcpu agnostic. [v3,15/47] xen/sched: make rt scheduler vcpu agnostic. [v3,16/47] xen/sched: make credit scheduler vcpu agnostic. [v3,17/47] xen/sched: make credit2 scheduler vcpu agnostic. [v3,18/47] xen/sched: make arinc653 scheduler vcpu agnostic. [v3,19/47] xen: add sched_unit_pause_nosync() and sched_unit_unpause() [v3,20/47] xen: let vcpu_create() select processor [v3,21/47] xen/sched: use sched_resource cpu instead smp_processor_id in schedulers [v3,22/47] xen/sched: switch schedule() from vcpus to sched_units [v3,23/47] xen/sched: switch sched_move_irqs() to take sched_unit as parameter [v3,24/47] xen: switch from for_each_vcpu() to for_each_sched_unit() [v3,25/47] xen/sched: add runstate counters to struct sched_unit [v3,26/47] xen/sched: Change vcpu_migrate_*() to operate on schedule unit [v3,27/47] xen/sched: move struct task_slice into struct sched_unit [v3,28/47] xen/sched: add code to sync scheduling of all vcpus of a sched unit [v3,29/47] xen/sched: introduce unit_runnable_state() [v3,30/47] xen/sched: add support for multiple vcpus per sched unit where missing [v3,31/47] xen/sched: modify cpupool_domain_cpumask() to be an unit mask [v3,32/47] xen/sched: support allocating multiple vcpus into one sched unit [v3,33/47] xen/sched: add a percpu resource index [v3,34/47] xen/sched: add fall back to idle vcpu when scheduling unit [v3,35/47] xen/sched: make vcpu_wake() and vcpu_sleep() core scheduling aware [v3,36/47] xen/sched: carve out freeing sched_unit memory into dedicated function [v3,37/47] xen/sched: move per-cpu variable scheduler to struct sched_resource [v3,38/47] xen/sched: move per-cpu variable cpupool to struct sched_resource [v3,39/47] xen/sched: reject switching smt on/off with core scheduling active [v3,40/47] xen/sched: prepare per-cpupool scheduling granularity [v3,41/47] xen/sched: split schedule_cpu_switch() [v3,42/47] xen/sched: protect scheduling resource via rcu [v3,43/47] xen/sched: support multiple cpus per scheduling resource [v3,44/47] xen/sched: support differing granularity in schedule_cpu_[add/rm]() [v3,45/47] xen/sched: support core scheduling for moving cpus to/from cpupools [v3,46/47] xen/sched: disable scheduling when entering ACPI deep sleep states [v3,47/47] xen/sched: add scheduling granularity enum

diff --git a/xen/common/cpupool.c b/xen/common/cpupool.c index c7d8a748d4..c3c1109be9 100644 --- a/xen/common/cpupool.c +++ b/xen/common/cpupool.c @@ -266,7 +266,7 @@ static int cpupool_assign_cpu_locked(struct cpupool *c, unsigned int cpu) if ( (cpupool_moving_cpu == cpu) && (c != cpupool_cpu_moving) ) return -EADDRNOTAVAIL; - ret = schedule_cpu_switch(cpu, c); + ret = schedule_cpu_add(cpu, c); if ( ret ) return ret; @@ -316,7 +316,7 @@ static int cpupool_unassign_cpu_finish(struct cpupool *c) */ if ( !ret ) { - ret = schedule_cpu_switch(cpu, NULL); + ret = schedule_cpu_rm(cpu); if ( ret ) cpumask_clear_cpu(cpu, &cpupool_free_cpus); else diff --git a/xen/common/schedule.c b/xen/common/schedule.c index b3c1aa0821..86ddca83e9 100644 --- a/xen/common/schedule.c +++ b/xen/common/schedule.c @@ -86,15 +86,6 @@ extern const struct scheduler *__start_schedulers_array[], *__end_schedulers_arr static struct scheduler __read_mostly ops; -static spinlock_t * -sched_idle_switch_sched(struct scheduler *new_ops, unsigned int cpu, - void *pdata, void *vdata) -{ - sched_idle_unit(cpu)->priv = NULL; - - return &sched_free_cpu_lock; -} - static struct sched_resource * sched_idle_res_pick(const struct scheduler *ops, const struct sched_unit *unit) { @@ -134,7 +125,6 @@ static struct scheduler sched_idle_ops = { .alloc_vdata = sched_idle_alloc_vdata, .free_vdata = sched_idle_free_vdata, - .switch_sched = sched_idle_switch_sched, }; static inline struct vcpu *unit2vcpu_cpu(struct sched_unit *unit, @@ -2509,36 +2499,22 @@ void __init scheduler_init(void) } /* - * Move a pCPU outside of the influence of the scheduler of its current - * cpupool, or subject it to the scheduler of a new cpupool. - * - * For the pCPUs that are removed from their cpupool, their scheduler becomes - * &sched_idle_ops (the idle scheduler). + * Move a pCPU from free cpus (running the idle scheduler) to a cpupool + * using any "real" scheduler. + * The cpu is still marked as "free" and not yet valid for its cpupool. */ -int schedule_cpu_switch(unsigned int cpu, struct cpupool *c) +int schedule_cpu_add(unsigned int cpu, struct cpupool *c) { struct vcpu *idle; - void *ppriv, *ppriv_old, *vpriv, *vpriv_old; - struct scheduler *old_ops = get_sched_res(cpu)->scheduler; - struct scheduler *new_ops = (c == NULL) ? &sched_idle_ops : c->sched; + void *ppriv, *vpriv; + struct scheduler *new_ops = c->sched; struct sched_resource *sd = get_sched_res(cpu); - struct cpupool *old_pool = sd->cpupool; spinlock_t *old_lock, *new_lock; unsigned long flags; - /* - * pCPUs only move from a valid cpupool to free (i.e., out of any pool), - * or from free to a valid cpupool. In the former case (which happens when - * c is NULL), we want the CPU to have been marked as free already, as - * well as to not be valid for the source pool any longer, when we get to - * here. In the latter case (which happens when c is a valid cpupool), we - * want the CPU to still be marked as free, as well as to not yet be valid - * for the destination pool. - */ - ASSERT(c != old_pool && (c != NULL || old_pool != NULL)); ASSERT(cpumask_test_cpu(cpu, &cpupool_free_cpus)); - ASSERT((c == NULL && !cpumask_test_cpu(cpu, old_pool->cpu_valid)) || - (c != NULL && !cpumask_test_cpu(cpu, c->cpu_valid))); + ASSERT(!cpumask_test_cpu(cpu, c->cpu_valid)); + ASSERT(get_sched_res(cpu)->cpupool == NULL); /* * To setup the cpu for the new scheduler we need: @@ -2563,52 +2539,91 @@ int schedule_cpu_switch(unsigned int cpu, struct cpupool *c) return -ENOMEM; } - sched_do_tick_suspend(old_ops, cpu); - /* - * The actual switch, including (if necessary) the rerouting of the - * scheduler lock to whatever new_ops prefers, needs to happen in one - * critical section, protected by old_ops' lock, or races are possible. - * It is, in fact, the lock of another scheduler that we are taking (the - * scheduler of the cpupool that cpu still belongs to). But that is ok - * as, anyone trying to schedule on this cpu will spin until when we - * release that lock (bottom of this function). When he'll get the lock - * --thanks to the loop inside *_schedule_lock() functions-- he'll notice - * that the lock itself changed, and retry acquiring the new one (which - * will be the correct, remapped one, at that point). + * The actual switch, including the rerouting of the scheduler lock to + * whatever new_ops prefers, needs to happen in one critical section, + * protected by old_ops' lock, or races are possible. + * It is, in fact, the lock of the idle scheduler that we are taking. + * But that is ok as anyone trying to schedule on this cpu will spin until + * when we release that lock (bottom of this function). When he'll get the + * lock --thanks to the loop inside *_schedule_lock() functions-- he'll + * notice that the lock itself changed, and retry acquiring the new one + * (which will be the correct, remapped one, at that point). */ old_lock = pcpu_schedule_lock_irqsave(cpu, &flags); - vpriv_old = idle->sched_unit->priv; - ppriv_old = sd->sched_priv; new_lock = sched_switch_sched(new_ops, cpu, ppriv, vpriv); sd->scheduler = new_ops; sd->sched_priv = ppriv; /* - * The data above is protected under new_lock, which may be unlocked. - * Another CPU can take new_lock as soon as sd->schedule_lock is visible, - * and must observe all prior initialisation. + * Reroute the lock to the per pCPU lock as /last/ thing. In fact, + * if it is free (and it can be) we want that anyone that manages + * taking it, finds all the initializations we've done above in place. */ smp_wmb(); sd->schedule_lock = new_lock; - /* _Not_ pcpu_schedule_unlock(): schedule_lock may have changed! */ + /* _Not_ pcpu_schedule_unlock(): schedule_lock has changed! */ spin_unlock_irqrestore(old_lock, flags); sched_do_tick_resume(new_ops, cpu); + sd->granularity = c->granularity; + sd->cpupool = c; + /* The cpu is added to a pool, trigger it to go pick up some work */ + cpu_raise_softirq(cpu, SCHEDULE_SOFTIRQ); + + return 0; +} + +/* + * Remove a pCPU from its cpupool. Its scheduler becomes &sched_idle_ops + * (the idle scheduler). + * The cpu is already marked as "free" and not valid any longer for its + * cpupool. + */ +int schedule_cpu_rm(unsigned int cpu) +{ + struct vcpu *idle; + void *ppriv_old, *vpriv_old; + struct sched_resource *sd = get_sched_res(cpu); + struct scheduler *old_ops = sd->scheduler; + spinlock_t *old_lock; + unsigned long flags; + + ASSERT(sd->cpupool != NULL); + ASSERT(cpumask_test_cpu(cpu, &cpupool_free_cpus)); + ASSERT(!cpumask_test_cpu(cpu, sd->cpupool->cpu_valid)); + + idle = idle_vcpu[cpu]; + + sched_do_tick_suspend(old_ops, cpu); + + /* See comment in schedule_cpu_add() regarding lock switching. */ + old_lock = pcpu_schedule_lock_irqsave(cpu, &flags); + + vpriv_old = idle->sched_unit->priv; + ppriv_old = sd->sched_priv; + + idle->sched_unit->priv = NULL; + sd->scheduler = &sched_idle_ops; + sd->sched_priv = NULL; + + smp_mb(); + sd->schedule_lock = &sched_free_cpu_lock; + + /* _Not_ pcpu_schedule_unlock(): schedule_lock may have changed! */ + spin_unlock_irqrestore(old_lock, flags); + sched_deinit_pdata(old_ops, ppriv_old, cpu); sched_free_vdata(old_ops, vpriv_old); sched_free_pdata(old_ops, ppriv_old, cpu); - get_sched_res(cpu)->granularity = c ? c->granularity : 1; - get_sched_res(cpu)->cpupool = c; - /* When a cpu is added to a pool, trigger it to go pick up some work */ - if ( c != NULL ) - cpu_raise_softirq(cpu, SCHEDULE_SOFTIRQ); + sd->granularity = 1; + sd->cpupool = NULL; return 0; } diff --git a/xen/include/xen/sched.h b/xen/include/xen/sched.h index e943b06646..cc65922b88 100644 --- a/xen/include/xen/sched.h +++ b/xen/include/xen/sched.h @@ -918,7 +918,8 @@ struct scheduler; struct scheduler *scheduler_get_default(void); struct scheduler *scheduler_alloc(unsigned int sched_id, int *perr); void scheduler_free(struct scheduler *sched); -int schedule_cpu_switch(unsigned int cpu, struct cpupool *c); +int schedule_cpu_add(unsigned int cpu, struct cpupool *c); +int schedule_cpu_rm(unsigned int cpu); void vcpu_set_periodic_timer(struct vcpu *v, s_time_t value); int cpu_disable_scheduler(unsigned int cpu); /* We need it in dom0_setup_vcpu */

[v3,41/47] xen/sched: split schedule_cpu_switch()

Commit Message

Patch