[v3,6/6] xen/sched: don't disable scheduler on cpus during suspend

Message ID	20190402053457.24912-7-jgross@suse.com (mailing list archive)
State	Superseded
Headers	show Return-Path: <xen-devel-bounces@lists.xenproject.org> From: Juergen Gross <jgross@suse.com> To: xen-devel@lists.xenproject.org Date: Tue, 2 Apr 2019 07:34:57 +0200 Message-Id: <20190402053457.24912-7-jgross@suse.com> In-Reply-To: <20190402053457.24912-1-jgross@suse.com> References: <20190402053457.24912-1-jgross@suse.com> Subject: [Xen-devel] [PATCH v3 6/6] xen/sched: don't disable scheduler on cpus during suspend Precedence: list Cc: Juergen Gross <jgross@suse.com>, George Dunlap <george.dunlap@eu.citrix.com>, Dario Faggioli <dfaggioli@suse.com> MIME-Version: 1.0 Content-Type: text/plain; charset="utf-8" Content-Transfer-Encoding: base64 Errors-To: xen-devel-bounces@lists.xenproject.org Sender: "Xen-devel" <xen-devel-bounces@lists.xenproject.org>
Series	xen: simplify suspend/resume handling \| expand [v3,0/6] xen: simplify suspend/resume handling [v3,1/6] xen/sched: call cpu_disable_scheduler() via cpu notifier [v3,2/6] xen: add helper for calling notifier_call_chain() to common/cpu.c [v3,3/6] xen: add new cpu notifier action CPU_RESUME_FAILED [v3,4/6] xen: don't free percpu areas during suspend [v3,5/6] xen/cpupool: simplify suspend/resume handling [v3,6/6] xen/sched: don't disable scheduler on cpus during suspend

diff --git a/xen/common/schedule.c b/xen/common/schedule.c index 7b93028170..1e21e5ba66 100644 --- a/xen/common/schedule.c +++ b/xen/common/schedule.c @@ -560,33 +560,6 @@ static void vcpu_move_locked(struct vcpu *v, unsigned int new_cpu) v->processor = new_cpu; } -/* - * Move a vcpu from its current processor to a target new processor, - * without asking the scheduler to do any placement. This is intended - * for being called from special contexts, where things are quiet - * enough that no contention is supposed to happen (i.e., during - * shutdown or software suspend, like ACPI S3). - */ -static void vcpu_move_nosched(struct vcpu *v, unsigned int new_cpu) -{ - unsigned long flags; - spinlock_t *lock, *new_lock; - - ASSERT(system_state == SYS_STATE_suspend); - ASSERT(!vcpu_runnable(v) && (atomic_read(&v->pause_count) || - atomic_read(&v->domain->pause_count))); - - lock = per_cpu(schedule_data, v->processor).schedule_lock; - new_lock = per_cpu(schedule_data, new_cpu).schedule_lock; - - sched_spin_lock_double(lock, new_lock, &flags); - ASSERT(new_cpu != v->processor); - vcpu_move_locked(v, new_cpu); - sched_spin_unlock_double(lock, new_lock, flags); - - sched_move_irqs(v); -} - /* * Initiating migration * @@ -735,31 +708,36 @@ void restore_vcpu_affinity(struct domain *d) ASSERT(!vcpu_runnable(v)); - lock = vcpu_schedule_lock_irq(v); - - if ( v->affinity_broken ) - { - sched_set_affinity(v, v->cpu_hard_affinity_saved, NULL); - v->affinity_broken = 0; - - } - /* - * During suspend (in cpu_disable_scheduler()), we moved every vCPU - * to BSP (which, as of now, is pCPU 0), as a temporary measure to - * allow the nonboot processors to have their data structure freed - * and go to sleep. But nothing guardantees that the BSP is a valid - * pCPU for a particular domain. + * Re-assign the initial processor as after resume we have no + * guarantee the old processor has come back to life again. * * Therefore, here, before actually unpausing the domains, we should * set v->processor of each of their vCPUs to something that will * make sense for the scheduler of the cpupool in which they are in. */ cpumask_and(cpumask_scratch_cpu(cpu), v->cpu_hard_affinity, - cpupool_domain_cpumask(v->domain)); - v->processor = cpumask_any(cpumask_scratch_cpu(cpu)); + cpupool_domain_cpumask(d)); + if ( cpumask_empty(cpumask_scratch_cpu(cpu)) ) + { + if ( v->affinity_broken ) + { + sched_set_affinity(v, v->cpu_hard_affinity_saved, NULL); + v->affinity_broken = 0; + cpumask_and(cpumask_scratch_cpu(cpu), v->cpu_hard_affinity, + cpupool_domain_cpumask(d)); + } - spin_unlock_irq(lock); + if ( cpumask_empty(cpumask_scratch_cpu(cpu)) ) + { + printk(XENLOG_DEBUG "Breaking affinity for %pv\n", v); + sched_set_affinity(v, &cpumask_all, NULL); + cpumask_and(cpumask_scratch_cpu(cpu), v->cpu_hard_affinity, + cpupool_domain_cpumask(d)); + } + } + + v->processor = cpumask_any(cpumask_scratch_cpu(cpu)); lock = vcpu_schedule_lock_irq(v); v->processor = SCHED_OP(vcpu_scheduler(v), pick_cpu, v); @@ -783,7 +761,6 @@ int cpu_disable_scheduler(unsigned int cpu) struct vcpu *v; struct cpupool *c; cpumask_t online_affinity; - unsigned int new_cpu; int ret = 0; c = per_cpu(cpupool, cpu); @@ -809,14 +786,7 @@ int cpu_disable_scheduler(unsigned int cpu) break; } - if (system_state == SYS_STATE_suspend) - { - cpumask_copy(v->cpu_hard_affinity_saved, - v->cpu_hard_affinity); - v->affinity_broken = 1; - } - else - printk(XENLOG_DEBUG "Breaking affinity for %pv\n", v); + printk(XENLOG_DEBUG "Breaking affinity for %pv\n", v); sched_set_affinity(v, &cpumask_all, NULL); } @@ -828,60 +798,26 @@ int cpu_disable_scheduler(unsigned int cpu) continue; } - /* If it is on this cpu, we must send it away. */ - if ( unlikely(system_state == SYS_STATE_suspend) ) - { - vcpu_schedule_unlock_irqrestore(lock, flags, v); - - /* - * If we are doing a shutdown/suspend, it is not necessary to - * ask the scheduler to chime in. In fact: - * * there is no reason for it: the end result we are after - * is just 'all the vcpus on the boot pcpu, and no vcpu - * anywhere else', so let's just go for it; - * * it's wrong, for cpupools with only non-boot pcpus, as - * the scheduler would always fail to send the vcpus away - * from the last online (non boot) pcpu! - * - * Therefore, in the shutdown/suspend case, we just pick up - * one (still) online pcpu. Note that, at this stage, all - * domains (including dom0) have been paused already, so we - * do not expect any vcpu activity at all. - */ - cpumask_andnot(&online_affinity, &cpu_online_map, - cpumask_of(cpu)); - BUG_ON(cpumask_empty(&online_affinity)); - /* - * As boot cpu is, usually, pcpu #0, using cpumask_first() - * will make us converge quicker. - */ - new_cpu = cpumask_first(&online_affinity); - vcpu_move_nosched(v, new_cpu); - } - else - { - /* - * OTOH, if the system is still live, and we are here because - * we are doing some cpupool manipulations: - * * we want to call the scheduler, and let it re-evaluation - * the placement of the vcpu, taking into account the new - * cpupool configuration; - * * the scheduler will always fine a suitable solution, or - * things would have failed before getting in here. - */ - vcpu_migrate_start(v); - vcpu_schedule_unlock_irqrestore(lock, flags, v); + /* If it is on this cpu, we must send it away. + * We are doing some cpupool manipulations: + * * we want to call the scheduler, and let it re-evaluation + * the placement of the vcpu, taking into account the new + * cpupool configuration; + * * the scheduler will always find a suitable solution, or + * things would have failed before getting in here. + */ + vcpu_migrate_start(v); + vcpu_schedule_unlock_irqrestore(lock, flags, v); - vcpu_migrate_finish(v); + vcpu_migrate_finish(v); - /* - * The only caveat, in this case, is that if a vcpu active in - * the hypervisor isn't migratable. In this case, the caller - * should try again after releasing and reaquiring all locks. - */ - if ( v->processor == cpu ) - ret = -EAGAIN; - } + /* + * The only caveat, in this case, is that if a vcpu active in + * the hypervisor isn't migratable. In this case, the caller + * should try again after releasing and reaquiring all locks. + */ + if ( v->processor == cpu ) + ret = -EAGAIN; } } @@ -1748,26 +1684,33 @@ static int cpu_schedule_callback( switch ( action ) { case CPU_STARTING: - SCHED_OP(sched, init_pdata, sd->sched_priv, cpu); + if ( system_state != SYS_STATE_resume ) + SCHED_OP(sched, init_pdata, sd->sched_priv, cpu); break; case CPU_UP_PREPARE: - rc = cpu_schedule_up(cpu); + if ( system_state != SYS_STATE_resume ) + rc = cpu_schedule_up(cpu); break; case CPU_DOWN_PREPARE: rcu_read_lock(&domlist_read_lock); rc = cpu_disable_scheduler_check(cpu); rcu_read_unlock(&domlist_read_lock); break; + case CPU_RESUME_FAILED: case CPU_DEAD: + if ( system_state == SYS_STATE_suspend ) + break; rcu_read_lock(&domlist_read_lock); rc = cpu_disable_scheduler(cpu); BUG_ON(rc); rcu_read_unlock(&domlist_read_lock); SCHED_OP(sched, deinit_pdata, sd->sched_priv, cpu); - /* Fallthrough */ - case CPU_UP_CANCELED: cpu_schedule_down(cpu); break; + case CPU_UP_CANCELED: + if ( system_state != SYS_STATE_resume ) + cpu_schedule_down(cpu); + break; default: break; }

[v3,6/6] xen/sched: don't disable scheduler on cpus during suspend

Commit Message

Patch