@@ -1171,7 +1171,10 @@ int arch_set_info_guest(
out:
if ( flags & VGCF_online )
+ {
+ v->reload_context = true;
clear_bit(_VPF_down, &v->pause_flags);
+ }
else
set_bit(_VPF_down, &v->pause_flags);
return 0;
@@ -1663,6 +1666,24 @@ static inline void load_default_gdt(seg_desc_t *gdt, unsigned int cpu)
per_cpu(full_gdt_loaded, cpu) = false;
}
+static void inline csw_load_regs(struct vcpu *v,
+ struct cpu_user_regs *stack_regs)
+{
+ memcpy(stack_regs, &v->arch.user_regs, CTXT_SWITCH_STACK_BYTES);
+ if ( cpu_has_xsave )
+ {
+ u64 xcr0 = v->arch.xcr0 ?: XSTATE_FP_SSE;
+
+ if ( xcr0 != get_xcr0() && !set_xcr0(xcr0) )
+ BUG();
+
+ if ( cpu_has_xsaves && is_hvm_vcpu(v) )
+ set_msr_xss(v->arch.hvm.msr_xss);
+ }
+ vcpu_restore_fpu_nonlazy(v, false);
+ v->domain->arch.ctxt_switch->to(v);
+}
+
static void __context_switch(void)
{
struct cpu_user_regs *stack_regs = guest_cpu_user_regs();
@@ -1676,7 +1697,7 @@ static void __context_switch(void)
ASSERT(p != n);
ASSERT(!vcpu_cpu_dirty(n));
- if ( !is_idle_domain(pd) )
+ if ( !is_idle_domain(pd) && is_vcpu_online(p) && !p->reload_context )
{
memcpy(&p->arch.user_regs, stack_regs, CTXT_SWITCH_STACK_BYTES);
vcpu_save_fpu(p);
@@ -1692,22 +1713,8 @@ static void __context_switch(void)
cpumask_set_cpu(cpu, nd->dirty_cpumask);
write_atomic(&n->dirty_cpu, cpu);
- if ( !is_idle_domain(nd) )
- {
- memcpy(stack_regs, &n->arch.user_regs, CTXT_SWITCH_STACK_BYTES);
- if ( cpu_has_xsave )
- {
- u64 xcr0 = n->arch.xcr0 ?: XSTATE_FP_SSE;
-
- if ( xcr0 != get_xcr0() && !set_xcr0(xcr0) )
- BUG();
-
- if ( cpu_has_xsaves && is_hvm_vcpu(n) )
- set_msr_xss(n->arch.hvm.msr_xss);
- }
- vcpu_restore_fpu_nonlazy(n, false);
- nd->arch.ctxt_switch->to(n);
- }
+ if ( !is_idle_domain(nd) && is_vcpu_online(n) )
+ csw_load_regs(n, stack_regs);
psr_ctxt_switch_to(nd);
@@ -1775,6 +1782,72 @@ static void context_wait_rendezvous_out(struct sched_item *item,
context_saved(prev);
}
+static void __continue_running(struct vcpu *same)
+{
+ struct domain *d = same->domain;
+ seg_desc_t *gdt;
+ bool full_gdt = need_full_gdt(d);
+ unsigned int cpu = smp_processor_id();
+
+ gdt = !is_pv_32bit_domain(d) ? per_cpu(gdt_table, cpu) :
+ per_cpu(compat_gdt_table, cpu);
+
+ if ( same->reload_context )
+ {
+ struct cpu_user_regs *stack_regs = guest_cpu_user_regs();
+
+ get_cpu_info()->use_pv_cr3 = false;
+ get_cpu_info()->xen_cr3 = 0;
+
+ local_irq_disable();
+
+ csw_load_regs(same, stack_regs);
+
+ psr_ctxt_switch_to(d);
+
+ if ( full_gdt )
+ write_full_gdt_ptes(gdt, same);
+
+ write_ptbase(same);
+
+#if defined(CONFIG_PV) && defined(CONFIG_HVM)
+ /* Prefetch the VMCB if we expect to use it later in context switch */
+ if ( cpu_has_svm && is_pv_domain(d) && !is_pv_32bit_domain(d) &&
+ !(read_cr4() & X86_CR4_FSGSBASE) )
+ svm_load_segs(0, 0, 0, 0, 0, 0, 0);
+#endif
+
+ if ( full_gdt )
+ load_full_gdt(same, cpu);
+
+ local_irq_enable();
+
+ if ( is_pv_domain(d) )
+ load_segments(same);
+
+ same->reload_context = false;
+
+ _update_runstate_area(same);
+
+ update_vcpu_system_time(same);
+ }
+ else if ( !is_idle_vcpu(same) && full_gdt != per_cpu(full_gdt_loaded, cpu) )
+ {
+ local_irq_disable();
+
+ if ( full_gdt )
+ {
+ write_full_gdt_ptes(gdt, same);
+ write_ptbase(same);
+ load_full_gdt(same, cpu);
+ }
+ else
+ load_default_gdt(gdt, cpu);
+
+ local_irq_enable();
+ }
+}
+
void context_switch(struct vcpu *prev, struct vcpu *next)
{
unsigned int cpu = smp_processor_id();
@@ -1811,6 +1884,9 @@ void context_switch(struct vcpu *prev, struct vcpu *next)
(is_idle_domain(nextd) && cpu_online(cpu)) )
{
local_irq_enable();
+
+ if ( !is_idle_domain(nextd) )
+ __continue_running(next);
}
else
{
@@ -1822,6 +1898,8 @@ void context_switch(struct vcpu *prev, struct vcpu *next)
if ( is_pv_domain(nextd) )
load_segments(next);
+ next->reload_context = false;
+
ctxt_switch_levelling(next);
if ( opt_ibpb && !is_idle_domain(nextd) )
@@ -1886,6 +1964,8 @@ void continue_running(struct vcpu *same)
if ( !vcpu_runnable(same) )
sched_vcpu_idle(same);
+ __continue_running(same);
+
/* See the comment above. */
same->domain->arch.ctxt_switch->tail(same);
BUG();
@@ -1133,6 +1133,7 @@ static int hvm_load_cpu_ctxt(struct domain *d, hvm_domain_context_t *h)
/* Auxiliary processors should be woken immediately. */
v->is_initialised = 1;
+ v->reload_context = true;
clear_bit(_VPF_down, &v->pause_flags);
vcpu_wake(v);
@@ -3913,6 +3914,7 @@ void hvm_vcpu_reset_state(struct vcpu *v, uint16_t cs, uint16_t ip)
v->arch.flags |= TF_kernel_mode;
v->is_initialised = 1;
+ v->reload_context = true;
clear_bit(_VPF_down, &v->pause_flags);
out:
@@ -367,6 +367,7 @@ static void vlapic_accept_irq(struct vcpu *v, uint32_t icr_low)
domain_lock(v->domain);
if ( v->is_initialised )
wake = test_and_clear_bit(_VPF_down, &v->pause_flags);
+ v->reload_context = wake;
domain_unlock(v->domain);
if ( wake )
vcpu_wake(v);
@@ -1383,6 +1383,8 @@ long do_vcpu_op(int cmd, unsigned int vcpuid, XEN_GUEST_HANDLE_PARAM(void) arg)
rc = -EINVAL;
else
wake = test_and_clear_bit(_VPF_down, &v->pause_flags);
+ if ( wake )
+ v->reload_context = true;
domain_unlock(d);
if ( wake )
vcpu_wake(v);
@@ -1775,17 +1775,22 @@ static struct sched_item *sched_wait_rendezvous_in(struct sched_item *prev,
{
next = do_schedule(prev, now);
atomic_set(&next->rendezvous_out_cnt, sched_granularity + 1);
- return next;
}
-
- while ( prev->rendezvous_in_cnt )
+ else
{
- pcpu_schedule_unlock_irq(lock, cpu);
- cpu_relax();
- pcpu_schedule_lock_irq(cpu);
+ while ( prev->rendezvous_in_cnt )
+ {
+ pcpu_schedule_unlock_irq(lock, cpu);
+ cpu_relax();
+ pcpu_schedule_lock_irq(cpu);
+ }
+ next = prev->next_task;
}
- return prev->next_task;
+ if ( unlikely(prev == next) )
+ vcpu_runstate_helper(current, RUNSTATE_running, now);
+
+ return next;
}
static void sched_context_switch(struct vcpu *vprev, struct vcpu *vnext,
@@ -206,6 +206,9 @@ struct vcpu
bool hcall_compat;
#endif
+ /* VCPU was down before (context might need to be reloaded). */
+ bool reload_context;
+
/* The CPU, if any, which is holding onto this VCPU's state. */
#define VCPU_CPU_CLEAN (~0u)
unsigned int dirty_cpu;
For core scheduling a transition from an offline vcpu to a running one must be special cased: the vcpu might be in guest idle but the context has to be loaded as if a context switch is to be done. For that purpose add a flag to the vcpu structure which indicates that condition. That flag is tested in continue_running() and if set the context is loaded if required. Carve out some context loading functionality from __context_switch() into a new function as we need it in continue_running() now, too. Signed-off-by: Juergen Gross <jgross@suse.com> --- xen/arch/x86/domain.c | 114 +++++++++++++++++++++++++++++++++++++++------- xen/arch/x86/hvm/hvm.c | 2 + xen/arch/x86/hvm/vlapic.c | 1 + xen/common/domain.c | 2 + xen/common/schedule.c | 19 +++++--- xen/include/xen/sched.h | 3 ++ 6 files changed, 117 insertions(+), 24 deletions(-)