@@ -80,7 +80,7 @@ unsigned int __init dom0_max_vcpus(void)
struct vcpu *__init alloc_dom0_vcpu0(struct domain *dom0)
{
- return vcpu_create(dom0, 0, 0);
+ return vcpu_create(dom0, 0);
}
static unsigned int __init get_allocation_size(paddr_t size)
@@ -1940,7 +1940,7 @@ static void __init find_gnttab_region(struct domain *d,
static int __init construct_domain(struct domain *d, struct kernel_info *kinfo)
{
- int i, cpu;
+ int i;
struct vcpu *v = d->vcpu[0];
struct cpu_user_regs *regs = &v->arch.cpu_info->guest_cpu_user_regs;
@@ -2003,12 +2003,11 @@ static int __init construct_domain(struct domain *d, struct kernel_info *kinfo)
}
#endif
- for ( i = 1, cpu = 0; i < d->max_vcpus; i++ )
+ for ( i = 1; i < d->max_vcpus; i++ )
{
- cpu = cpumask_cycle(cpu, &cpu_online_map);
- if ( vcpu_create(d, i, cpu) == NULL )
+ if ( vcpu_create(d, i) == NULL )
{
- printk("Failed to allocate dom0 vcpu %d on pcpu %d\n", i, cpu);
+ printk("Failed to allocate d0v%u\n", i);
break;
}
@@ -2043,7 +2042,7 @@ static int __init construct_domU(struct domain *d,
kinfo.vpl011 = dt_property_read_bool(node, "vpl011");
- if ( vcpu_create(d, 0, 0) == NULL )
+ if ( vcpu_create(d, 0) == NULL )
return -ENOMEM;
d->max_pages = ~0U;
@@ -165,7 +165,7 @@ custom_param("dom0_max_vcpus", parse_dom0_max_vcpus);
static __initdata unsigned int dom0_nr_pxms;
static __initdata unsigned int dom0_pxms[MAX_NUMNODES] =
{ [0 ... MAX_NUMNODES - 1] = ~0 };
-static __initdata bool dom0_affinity_relaxed;
+__initdata bool dom0_affinity_relaxed;
static int __init parse_dom0_nodes(const char *s)
{
@@ -196,32 +196,7 @@ static int __init parse_dom0_nodes(const char *s)
}
custom_param("dom0_nodes", parse_dom0_nodes);
-static cpumask_t __initdata dom0_cpus;
-
-struct vcpu *__init dom0_setup_vcpu(struct domain *d,
- unsigned int vcpu_id,
- unsigned int prev_cpu)
-{
- unsigned int cpu = cpumask_cycle(prev_cpu, &dom0_cpus);
- struct vcpu *v = vcpu_create(d, vcpu_id, cpu);
-
- if ( v )
- {
- if ( pv_shim )
- {
- sched_set_affinity(v, cpumask_of(vcpu_id), cpumask_of(vcpu_id));
- }
- else
- {
- if ( !opt_dom0_vcpus_pin && !dom0_affinity_relaxed )
- sched_set_affinity(v, &dom0_cpus, NULL);
- sched_set_affinity(v, NULL, &dom0_cpus);
- }
- }
-
- return v;
-}
-
+cpumask_t __initdata dom0_cpus;
static nodemask_t __initdata dom0_nodes;
unsigned int __init dom0_max_vcpus(void)
@@ -273,8 +248,7 @@ struct vcpu *__init alloc_dom0_vcpu0(struct domain *dom0)
dom0->node_affinity = dom0_nodes;
dom0->auto_node_affinity = !dom0_nr_pxms;
- return dom0_setup_vcpu(dom0, 0,
- cpumask_last(&dom0_cpus) /* so it wraps around to first pcpu */);
+ return vcpu_create(dom0, 0);
}
#ifdef CONFIG_SHADOW_PAGING
@@ -614,7 +614,6 @@ static int __init pvh_setup_cpus(struct domain *d, paddr_t entry,
paddr_t start_info)
{
struct vcpu *v = d->vcpu[0];
- unsigned int cpu = v->processor, i;
int rc;
/*
* This sets the vCPU state according to the state described in
@@ -635,15 +634,7 @@ static int __init pvh_setup_cpus(struct domain *d, paddr_t entry,
.cpu_regs.x86_32.tr_ar = 0x8b,
};
- for ( i = 1; i < d->max_vcpus; i++ )
- {
- const struct vcpu *p = dom0_setup_vcpu(d, i, cpu);
-
- if ( p )
- cpu = p->processor;
- }
-
- domain_update_node_affinity(d);
+ sched_setup_dom0_vcpus(d);
rc = arch_set_info_hvm_guest(v, &cpu_ctx);
if ( rc )
@@ -285,7 +285,7 @@ int __init dom0_construct_pv(struct domain *d,
module_t *initrd,
char *cmdline)
{
- int i, cpu, rc, compatible, order, machine;
+ int i, rc, compatible, order, machine;
struct cpu_user_regs *regs;
unsigned long pfn, mfn;
unsigned long nr_pages;
@@ -694,16 +694,8 @@ int __init dom0_construct_pv(struct domain *d,
printk("Dom%u has maximum %u VCPUs\n", d->domain_id, d->max_vcpus);
- cpu = v->processor;
- for ( i = 1; i < d->max_vcpus; i++ )
- {
- const struct vcpu *p = dom0_setup_vcpu(d, i, cpu);
-
- if ( p )
- cpu = p->processor;
- }
+ sched_setup_dom0_vcpus(d);
- domain_update_node_affinity(d);
d->arch.paging.mode = 0;
/* Set up CR3 value for write_ptbase */
@@ -135,8 +135,7 @@ static void vcpu_destroy(struct vcpu *v)
free_vcpu_struct(v);
}
-struct vcpu *vcpu_create(
- struct domain *d, unsigned int vcpu_id, unsigned int cpu_id)
+struct vcpu *vcpu_create(struct domain *d, unsigned int vcpu_id)
{
struct vcpu *v;
@@ -168,7 +167,7 @@ struct vcpu *vcpu_create(
init_waitqueue_vcpu(v);
}
- if ( sched_init_vcpu(v, cpu_id) != 0 )
+ if ( sched_init_vcpu(v) != 0 )
goto fail_wq;
if ( arch_vcpu_create(v) != 0 )
@@ -532,8 +532,7 @@ long do_domctl(XEN_GUEST_HANDLE_PARAM(xen_domctl_t) u_domctl)
case XEN_DOMCTL_max_vcpus:
{
- unsigned int i, max = op->u.max_vcpus.max, cpu;
- cpumask_t *online;
+ unsigned int i, max = op->u.max_vcpus.max;
ret = -EINVAL;
if ( (d == current->domain) || /* no domain_pause() */
@@ -544,18 +543,13 @@ long do_domctl(XEN_GUEST_HANDLE_PARAM(xen_domctl_t) u_domctl)
domain_pause(d);
ret = -ENOMEM;
- online = cpupool_domain_cpumask(d);
for ( i = 0; i < max; i++ )
{
if ( d->vcpu[i] != NULL )
continue;
- cpu = (i == 0) ?
- cpumask_any(online) :
- cpumask_cycle(d->vcpu[i-1]->processor, online);
-
- if ( vcpu_create(d, i, cpu) == NULL )
+ if ( vcpu_create(d, i) == NULL )
goto maxvcpu_out;
}
@@ -38,6 +38,10 @@
#include <xsm/xsm.h>
#include <xen/err.h>
+#ifdef CONFIG_X86
+#include <asm/guest.h>
+#endif
+
/* opt_sched: scheduler - default to configured value */
static char __initdata opt_sched[10] = CONFIG_SCHED_DEFAULT;
string_param("sched", opt_sched);
@@ -80,6 +84,9 @@ extern const struct scheduler *__start_schedulers_array[], *__end_schedulers_arr
static struct scheduler __read_mostly ops;
+static void sched_set_affinity(
+ struct sched_unit *unit, const cpumask_t *hard, const cpumask_t *soft);
+
static spinlock_t *
sched_idle_switch_sched(struct scheduler *new_ops, unsigned int cpu,
void *pdata, void *vdata)
@@ -372,14 +379,52 @@ static struct sched_unit *sched_alloc_unit(struct vcpu *v)
return NULL;
}
-int sched_init_vcpu(struct vcpu *v, unsigned int processor)
+static unsigned int sched_select_initial_cpu(const struct vcpu *v)
+{
+ const struct domain *d = v->domain;
+ nodeid_t node;
+ spinlock_t *lock;
+ unsigned long flags;
+ unsigned int cpu_ret, cpu = smp_processor_id();
+ cpumask_t *cpus = cpumask_scratch_cpu(cpu);
+
+ lock = pcpu_schedule_lock_irqsave(cpu, &flags);
+ cpumask_clear(cpus);
+ for_each_node_mask ( node, d->node_affinity )
+ cpumask_or(cpus, cpus, &node_to_cpumask(node));
+ cpumask_and(cpus, cpus, cpupool_domain_cpumask(d));
+ if ( cpumask_empty(cpus) )
+ cpumask_copy(cpus, cpupool_domain_cpumask(d));
+
+ if ( v->vcpu_id == 0 )
+ cpu_ret = cpumask_first(cpus);
+ else
+ {
+ /* We can rely on previous vcpu being available. */
+ ASSERT(!is_idle_domain(d));
+
+ cpu_ret = cpumask_cycle(d->vcpu[v->vcpu_id - 1]->processor, cpus);
+ }
+
+ pcpu_schedule_unlock_irqrestore(lock, flags, cpu);
+
+ return cpu_ret;
+}
+
+int sched_init_vcpu(struct vcpu *v)
{
struct domain *d = v->domain;
struct sched_unit *unit;
+ unsigned int processor;
if ( (unit = sched_alloc_unit(v)) == NULL )
return 1;
+ if ( is_idle_domain(d) )
+ processor = v->vcpu_id;
+ else
+ processor = sched_select_initial_cpu(v);
+
sched_set_res(unit, get_sched_res(processor));
/* Initialise the per-vcpu timers. */
@@ -403,9 +448,9 @@ int sched_init_vcpu(struct vcpu *v, unsigned int processor)
* domain-0 VCPUs, are pinned onto their respective physical CPUs.
*/
if ( is_idle_domain(d) || (is_hardware_domain(d) && opt_dom0_vcpus_pin) )
- sched_set_affinity(v, cpumask_of(processor), &cpumask_all);
+ sched_set_affinity(unit, cpumask_of(processor), &cpumask_all);
else
- sched_set_affinity(v, &cpumask_all, &cpumask_all);
+ sched_set_affinity(unit, &cpumask_all, &cpumask_all);
/* Idle VCPUs are scheduled immediately, so don't put them in runqueue. */
if ( is_idle_domain(d) )
@@ -496,7 +541,7 @@ int sched_move_domain(struct domain *d, struct cpupool *c)
lock = unit_schedule_lock_irq(v->sched_unit);
- sched_set_affinity(v, &cpumask_all, &cpumask_all);
+ sched_set_affinity(v->sched_unit, &cpumask_all, &cpumask_all);
sched_set_res(v->sched_unit, get_sched_res(new_p));
/*
@@ -839,7 +884,7 @@ void restore_vcpu_affinity(struct domain *d)
{
if ( v->affinity_broken )
{
- sched_set_affinity(v, unit->cpu_hard_affinity_saved, NULL);
+ sched_set_affinity(unit, unit->cpu_hard_affinity_saved, NULL);
v->affinity_broken = 0;
cpumask_and(cpumask_scratch_cpu(cpu), unit->cpu_hard_affinity,
cpupool_domain_cpumask(d));
@@ -848,7 +893,7 @@ void restore_vcpu_affinity(struct domain *d)
if ( cpumask_empty(cpumask_scratch_cpu(cpu)) )
{
printk(XENLOG_DEBUG "Breaking affinity for %pv\n", v);
- sched_set_affinity(v, &cpumask_all, NULL);
+ sched_set_affinity(unit, &cpumask_all, NULL);
cpumask_and(cpumask_scratch_cpu(cpu), unit->cpu_hard_affinity,
cpupool_domain_cpumask(d));
}
@@ -911,7 +956,7 @@ int cpu_disable_scheduler(unsigned int cpu)
printk(XENLOG_DEBUG "Breaking affinity for %pv\n", v);
- sched_set_affinity(v, &cpumask_all, NULL);
+ sched_set_affinity(unit, &cpumask_all, NULL);
}
if ( v->processor != cpu )
@@ -970,14 +1015,12 @@ static int cpu_disable_scheduler_check(unsigned int cpu)
* adjust_affinity hook may want to modify the vCPU state. However, when the
* vCPU is being initialized (either for dom0 or domU) there is no risk of
* races, and it's fine to not take the look (we're talking about
- * dom0_setup_vcpu() an sched_init_vcpu()).
+ * sched_setup_dom0_vcpus() an sched_init_vcpu()).
*/
-void sched_set_affinity(
- struct vcpu *v, const cpumask_t *hard, const cpumask_t *soft)
+static void sched_set_affinity(
+ struct sched_unit *unit, const cpumask_t *hard, const cpumask_t *soft)
{
- struct sched_unit *unit = v->sched_unit;
-
- sched_adjust_affinity(dom_scheduler(v->domain), unit, hard, soft);
+ sched_adjust_affinity(dom_scheduler(unit->domain), unit, hard, soft);
if ( hard )
cpumask_copy(unit->cpu_hard_affinity, hard);
@@ -1009,12 +1052,12 @@ static int vcpu_set_affinity(
*/
if ( which == unit->cpu_hard_affinity )
{
- sched_set_affinity(v, affinity, NULL);
+ sched_set_affinity(unit, affinity, NULL);
}
else
{
ASSERT(which == unit->cpu_soft_affinity);
- sched_set_affinity(v, NULL, affinity);
+ sched_set_affinity(unit, NULL, affinity);
}
vcpu_migrate_start(v);
}
@@ -1262,7 +1305,7 @@ int vcpu_temporary_affinity(struct vcpu *v, unsigned int cpu, uint8_t reason)
v->affinity_broken &= ~reason;
}
if ( !ret && !v->affinity_broken )
- sched_set_affinity(v, unit->cpu_hard_affinity_saved, NULL);
+ sched_set_affinity(unit, unit->cpu_hard_affinity_saved, NULL);
}
else if ( cpu < nr_cpu_ids )
{
@@ -1275,7 +1318,7 @@ int vcpu_temporary_affinity(struct vcpu *v, unsigned int cpu, uint8_t reason)
{
cpumask_copy(unit->cpu_hard_affinity_saved,
unit->cpu_hard_affinity);
- sched_set_affinity(v, cpumask_of(cpu), NULL);
+ sched_set_affinity(unit, cpumask_of(cpu), NULL);
}
v->affinity_broken |= reason;
ret = 0;
@@ -1764,7 +1807,7 @@ static int cpu_schedule_up(unsigned int cpu)
return 0;
if ( idle_vcpu[cpu] == NULL )
- vcpu_create(idle_vcpu[0]->domain, cpu, cpu);
+ vcpu_create(idle_vcpu[0]->domain, cpu);
else
idle_vcpu[cpu]->sched_unit->res = sr;
@@ -1943,7 +1986,7 @@ void __init scheduler_init(void)
BUG_ON(nr_cpu_ids > ARRAY_SIZE(idle_vcpu));
idle_domain->vcpu = idle_vcpu;
idle_domain->max_vcpus = nr_cpu_ids;
- if ( vcpu_create(idle_domain, 0, 0) == NULL )
+ if ( vcpu_create(idle_domain, 0) == NULL )
BUG();
get_sched_res(0)->curr = idle_vcpu[0]->sched_unit;
}
@@ -2145,6 +2188,35 @@ void wait(void)
schedule();
}
+#ifdef CONFIG_X86
+void __init sched_setup_dom0_vcpus(struct domain *d)
+{
+ unsigned int i;
+ struct sched_unit *unit;
+
+ for ( i = 1; i < d->max_vcpus; i++ )
+ vcpu_create(d, i);
+
+ for_each_sched_unit ( d, unit )
+ {
+ unsigned int id = unit->unit_id;
+
+ if ( pv_shim )
+ {
+ sched_set_affinity(unit, cpumask_of(id), cpumask_of(id));
+ }
+ else
+ {
+ if ( !opt_dom0_vcpus_pin && !dom0_affinity_relaxed )
+ sched_set_affinity(unit, &dom0_cpus, NULL);
+ sched_set_affinity(unit, NULL, &dom0_cpus);
+ }
+ }
+
+ domain_update_node_affinity(d);
+}
+#endif
+
#ifdef CONFIG_COMPAT
#include "compat/schedule.c"
#endif
@@ -11,8 +11,6 @@ extern unsigned int dom0_memflags;
unsigned long dom0_compute_nr_pages(struct domain *d,
struct elf_dom_parms *parms,
unsigned long initrd_len);
-struct vcpu *dom0_setup_vcpu(struct domain *d, unsigned int vcpu_id,
- unsigned int cpu);
int dom0_setup_permissions(struct domain *d);
int dom0_construct_pv(struct domain *d, const module_t *image,
@@ -13,8 +13,7 @@ typedef union {
struct compat_vcpu_guest_context *cmp;
} vcpu_guest_context_u __attribute__((__transparent_union__));
-struct vcpu *vcpu_create(
- struct domain *d, unsigned int vcpu_id, unsigned int cpu_id);
+struct vcpu *vcpu_create(struct domain *d, unsigned int vcpu_id);
unsigned int dom0_max_vcpus(void);
struct vcpu *alloc_dom0_vcpu0(struct domain *dom0);
@@ -107,6 +106,8 @@ int continue_hypercall_on_cpu(
extern unsigned int xen_processor_pmbits;
extern bool_t opt_dom0_vcpus_pin;
+extern cpumask_t dom0_cpus;
+extern bool dom0_affinity_relaxed;
/* vnuma topology per domain. */
struct vnuma_info {
@@ -498,7 +498,7 @@ struct cpupool
#define cpupool_online_cpumask(_pool) \
(((_pool) == NULL) ? &cpu_online_map : (_pool)->cpu_valid)
-static inline cpumask_t* cpupool_domain_cpumask(struct domain *d)
+static inline cpumask_t *cpupool_domain_cpumask(const struct domain *d)
{
/*
* d->cpupool is NULL only for the idle domain, and no one should
@@ -661,7 +661,7 @@ void __domain_crash(struct domain *d);
void noreturn asm_domain_crash_synchronous(unsigned long addr);
void scheduler_init(void);
-int sched_init_vcpu(struct vcpu *v, unsigned int processor);
+int sched_init_vcpu(struct vcpu *v);
void sched_destroy_vcpu(struct vcpu *v);
int sched_init_domain(struct domain *d, int poolid);
void sched_destroy_domain(struct domain *d);
@@ -906,9 +906,7 @@ void scheduler_free(struct scheduler *sched);
int schedule_cpu_switch(unsigned int cpu, struct cpupool *c);
void vcpu_set_periodic_timer(struct vcpu *v, s_time_t value);
int cpu_disable_scheduler(unsigned int cpu);
-/* We need it in dom0_setup_vcpu */
-void sched_set_affinity(struct vcpu *v, const cpumask_t *hard,
- const cpumask_t *soft);
+void sched_setup_dom0_vcpus(struct domain *d);
int vcpu_temporary_affinity(struct vcpu *v, unsigned int cpu, uint8_t reason);
int vcpu_set_hard_affinity(struct vcpu *v, const cpumask_t *affinity);
int vcpu_set_soft_affinity(struct vcpu *v, const cpumask_t *affinity);