Message ID | 20190506065644.7415-25-jgross@suse.com (mailing list archive) |
---|---|
State | Superseded |
Headers | show |
Series | xen: add core scheduling support | expand |
>>> On 06.05.19 at 08:56, <jgross@suse.com> wrote: > --- a/xen/common/schedule.c > +++ b/xen/common/schedule.c > @@ -314,14 +314,42 @@ static struct sched_item *sched_alloc_item(struct vcpu *v) > return NULL; > } > > -int sched_init_vcpu(struct vcpu *v, unsigned int processor) > +static unsigned int sched_select_initial_cpu(struct vcpu *v) > +{ > + struct domain *d = v->domain; const (perhaps also the function parameter)? > + nodeid_t node; > + cpumask_t cpus; To be honest, I'm not happy to see new on-stack instances of cpumask_t appear. Seeing ... > + cpumask_clear(&cpus); > + for_each_node_mask ( node, d->node_affinity ) > + cpumask_or(&cpus, &cpus, &node_to_cpumask(node)); > + cpumask_and(&cpus, &cpus, cpupool_domain_cpumask(d)); > + if ( cpumask_empty(&cpus) ) > + cpumask_copy(&cpus, cpupool_domain_cpumask(d)); ... this fallback you use anyway, is there any issue with it also serving the case where zalloc_cpumask_var() fails? > + if ( v->vcpu_id == 0 ) > + return cpumask_first(&cpus); > + > + /* We can rely on previous vcpu being available. */ > + ASSERT(!is_idle_domain(d)); > + > + return cpumask_cycle(d->vcpu[v->vcpu_id - 1]->processor, &cpus); > +} > + > +int sched_init_vcpu(struct vcpu *v) > { > struct domain *d = v->domain; > struct sched_item *item; > + unsigned int processor; > > if ( (item = sched_alloc_item(v)) == NULL ) > return 1; > > + if ( is_idle_domain(d) ) > + processor = v->vcpu_id; > + else > + processor = sched_select_initial_cpu(v); > + > sched_set_res(item, per_cpu(sched_res, processor)); > > /* Initialise the per-vcpu timers. */ > @@ -1673,7 +1701,7 @@ static int cpu_schedule_up(unsigned int cpu) > return 0; > > if ( idle_vcpu[cpu] == NULL ) > - vcpu_create(idle_vcpu[0]->domain, cpu, cpu); > + vcpu_create(idle_vcpu[0]->domain, cpu); > else > { > struct vcpu *idle = idle_vcpu[cpu]; > @@ -1867,7 +1895,7 @@ void __init scheduler_init(void) > BUG_ON(nr_cpu_ids > ARRAY_SIZE(idle_vcpu)); > idle_domain->vcpu = idle_vcpu; > idle_domain->max_vcpus = nr_cpu_ids; > - if ( vcpu_create(idle_domain, 0, 0) == NULL ) > + if ( vcpu_create(idle_domain, 0) == NULL ) > BUG(); > this_cpu(sched_res)->curr = idle_vcpu[0]->sched_item; > this_cpu(sched_res)->sched_priv = sched_alloc_pdata(&ops, 0); > diff --git a/xen/include/asm-x86/dom0_build.h b/xen/include/asm-x86/dom0_build.h > index 33a5483739..3eb4b036e1 100644 > --- a/xen/include/asm-x86/dom0_build.h > +++ b/xen/include/asm-x86/dom0_build.h > @@ -11,8 +11,7 @@ extern unsigned int dom0_memflags; > unsigned long dom0_compute_nr_pages(struct domain *d, > struct elf_dom_parms *parms, > unsigned long initrd_len); > -struct vcpu *dom0_setup_vcpu(struct domain *d, unsigned int vcpu_id, > - unsigned int cpu); > +struct vcpu *dom0_setup_vcpu(struct domain *d, unsigned int vcpu_id); > int dom0_setup_permissions(struct domain *d); > > int dom0_construct_pv(struct domain *d, const module_t *image, > diff --git a/xen/include/xen/domain.h b/xen/include/xen/domain.h > index d1bfc82f57..a6e929685c 100644 > --- a/xen/include/xen/domain.h > +++ b/xen/include/xen/domain.h > @@ -13,8 +13,7 @@ typedef union { > struct compat_vcpu_guest_context *cmp; > } vcpu_guest_context_u __attribute__((__transparent_union__)); > > -struct vcpu *vcpu_create( > - struct domain *d, unsigned int vcpu_id, unsigned int cpu_id); > +struct vcpu *vcpu_create(struct domain *d, unsigned int vcpu_id); > > unsigned int dom0_max_vcpus(void); > struct vcpu *alloc_dom0_vcpu0(struct domain *dom0); > diff --git a/xen/include/xen/sched.h b/xen/include/xen/sched.h > index da117365af..8052f98780 100644 > --- a/xen/include/xen/sched.h > +++ b/xen/include/xen/sched.h > @@ -663,7 +663,7 @@ void __domain_crash(struct domain *d); > void noreturn asm_domain_crash_synchronous(unsigned long addr); > > void scheduler_init(void); > -int sched_init_vcpu(struct vcpu *v, unsigned int processor); > +int sched_init_vcpu(struct vcpu *v); > void sched_destroy_vcpu(struct vcpu *v); > int sched_init_domain(struct domain *d, int poolid); > void sched_destroy_domain(struct domain *d); > -- > 2.16.4
On 16/05/2019 14:20, Jan Beulich wrote: >>>> On 06.05.19 at 08:56, <jgross@suse.com> wrote: >> --- a/xen/common/schedule.c >> +++ b/xen/common/schedule.c >> @@ -314,14 +314,42 @@ static struct sched_item *sched_alloc_item(struct vcpu *v) >> return NULL; >> } >> >> -int sched_init_vcpu(struct vcpu *v, unsigned int processor) >> +static unsigned int sched_select_initial_cpu(struct vcpu *v) >> +{ >> + struct domain *d = v->domain; > > const (perhaps also the function parameter)? Yes. > >> + nodeid_t node; >> + cpumask_t cpus; > > To be honest, I'm not happy to see new on-stack instances of > cpumask_t appear. Seeing ... > >> + cpumask_clear(&cpus); >> + for_each_node_mask ( node, d->node_affinity ) >> + cpumask_or(&cpus, &cpus, &node_to_cpumask(node)); >> + cpumask_and(&cpus, &cpus, cpupool_domain_cpumask(d)); >> + if ( cpumask_empty(&cpus) ) >> + cpumask_copy(&cpus, cpupool_domain_cpumask(d)); > > ... this fallback you use anyway, is there any issue with it also > serving the case where zalloc_cpumask_var() fails? Either that, or: - just fail to create the vcpu in that case, as chances are rather high e.g. the following arch_vcpu_create() will fail anyway - take the scheduling lock and use cpumask_scratch - (ab)use one of the available cpumasks in struct sched_unit which are not in use yet My preference would be using cpumask_scratch. Juergen
>>> On 16.05.19 at 14:46, <jgross@suse.com> wrote: > On 16/05/2019 14:20, Jan Beulich wrote: >>>>> On 06.05.19 at 08:56, <jgross@suse.com> wrote: >>> --- a/xen/common/schedule.c >>> +++ b/xen/common/schedule.c >>> @@ -314,14 +314,42 @@ static struct sched_item *sched_alloc_item(struct vcpu *v) >>> return NULL; >>> } >>> >>> -int sched_init_vcpu(struct vcpu *v, unsigned int processor) >>> +static unsigned int sched_select_initial_cpu(struct vcpu *v) >>> +{ >>> + struct domain *d = v->domain; >>> + nodeid_t node; >>> + cpumask_t cpus; >> >> To be honest, I'm not happy to see new on-stack instances of >> cpumask_t appear. Seeing ... >> >>> + cpumask_clear(&cpus); >>> + for_each_node_mask ( node, d->node_affinity ) >>> + cpumask_or(&cpus, &cpus, &node_to_cpumask(node)); >>> + cpumask_and(&cpus, &cpus, cpupool_domain_cpumask(d)); >>> + if ( cpumask_empty(&cpus) ) >>> + cpumask_copy(&cpus, cpupool_domain_cpumask(d)); >> >> ... this fallback you use anyway, is there any issue with it also >> serving the case where zalloc_cpumask_var() fails? > > Either that, or: > > - just fail to create the vcpu in that case, as chances are rather > high e.g. the following arch_vcpu_create() will fail anyway Ah, right, this is for vCPU creation only anyway. > - take the scheduling lock and use cpumask_scratch > - (ab)use one of the available cpumasks in struct sched_unit which > are not in use yet > > My preference would be using cpumask_scratch. I'm actually fine with any of the variants, including that of simply returning -ENOMEM. Jan
diff --git a/xen/arch/arm/domain_build.c b/xen/arch/arm/domain_build.c index d9836779d1..86a6e4bf7b 100644 --- a/xen/arch/arm/domain_build.c +++ b/xen/arch/arm/domain_build.c @@ -80,7 +80,7 @@ unsigned int __init dom0_max_vcpus(void) struct vcpu *__init alloc_dom0_vcpu0(struct domain *dom0) { - return vcpu_create(dom0, 0, 0); + return vcpu_create(dom0, 0); } static unsigned int __init get_allocation_size(paddr_t size) @@ -1923,7 +1923,7 @@ static void __init find_gnttab_region(struct domain *d, static int __init construct_domain(struct domain *d, struct kernel_info *kinfo) { - int i, cpu; + int i; struct vcpu *v = d->vcpu[0]; struct cpu_user_regs *regs = &v->arch.cpu_info->guest_cpu_user_regs; @@ -1986,12 +1986,11 @@ static int __init construct_domain(struct domain *d, struct kernel_info *kinfo) } #endif - for ( i = 1, cpu = 0; i < d->max_vcpus; i++ ) + for ( i = 1; i < d->max_vcpus; i++ ) { - cpu = cpumask_cycle(cpu, &cpu_online_map); - if ( vcpu_create(d, i, cpu) == NULL ) + if ( vcpu_create(d, i) == NULL ) { - printk("Failed to allocate dom0 vcpu %d on pcpu %d\n", i, cpu); + printk("Failed to allocate d0v%u\n", i); break; } @@ -2026,7 +2025,7 @@ static int __init construct_domU(struct domain *d, kinfo.vpl011 = dt_property_read_bool(node, "vpl011"); - if ( vcpu_create(d, 0, 0) == NULL ) + if ( vcpu_create(d, 0) == NULL ) return -ENOMEM; d->max_pages = ~0U; diff --git a/xen/arch/x86/dom0_build.c b/xen/arch/x86/dom0_build.c index 73f5407b0d..e550db8b03 100644 --- a/xen/arch/x86/dom0_build.c +++ b/xen/arch/x86/dom0_build.c @@ -198,12 +198,9 @@ custom_param("dom0_nodes", parse_dom0_nodes); static cpumask_t __initdata dom0_cpus; -struct vcpu *__init dom0_setup_vcpu(struct domain *d, - unsigned int vcpu_id, - unsigned int prev_cpu) +struct vcpu *__init dom0_setup_vcpu(struct domain *d, unsigned int vcpu_id) { - unsigned int cpu = cpumask_cycle(prev_cpu, &dom0_cpus); - struct vcpu *v = vcpu_create(d, vcpu_id, cpu); + struct vcpu *v = vcpu_create(d, vcpu_id); if ( v ) { @@ -273,8 +270,7 @@ struct vcpu *__init alloc_dom0_vcpu0(struct domain *dom0) dom0->node_affinity = dom0_nodes; dom0->auto_node_affinity = !dom0_nr_pxms; - return dom0_setup_vcpu(dom0, 0, - cpumask_last(&dom0_cpus) /* so it wraps around to first pcpu */); + return dom0_setup_vcpu(dom0, 0); } #ifdef CONFIG_SHADOW_PAGING diff --git a/xen/arch/x86/hvm/dom0_build.c b/xen/arch/x86/hvm/dom0_build.c index aa599f09ef..15166bbaa9 100644 --- a/xen/arch/x86/hvm/dom0_build.c +++ b/xen/arch/x86/hvm/dom0_build.c @@ -614,7 +614,7 @@ static int __init pvh_setup_cpus(struct domain *d, paddr_t entry, paddr_t start_info) { struct vcpu *v = d->vcpu[0]; - unsigned int cpu = v->processor, i; + unsigned int i; int rc; /* * This sets the vCPU state according to the state described in @@ -636,12 +636,7 @@ static int __init pvh_setup_cpus(struct domain *d, paddr_t entry, }; for ( i = 1; i < d->max_vcpus; i++ ) - { - const struct vcpu *p = dom0_setup_vcpu(d, i, cpu); - - if ( p ) - cpu = p->processor; - } + dom0_setup_vcpu(d, i); domain_update_node_affinity(d); diff --git a/xen/arch/x86/pv/dom0_build.c b/xen/arch/x86/pv/dom0_build.c index cef2d42254..800b3e6b7d 100644 --- a/xen/arch/x86/pv/dom0_build.c +++ b/xen/arch/x86/pv/dom0_build.c @@ -285,7 +285,7 @@ int __init dom0_construct_pv(struct domain *d, module_t *initrd, char *cmdline) { - int i, cpu, rc, compatible, order, machine; + int i, rc, compatible, order, machine; struct cpu_user_regs *regs; unsigned long pfn, mfn; unsigned long nr_pages; @@ -693,14 +693,8 @@ int __init dom0_construct_pv(struct domain *d, printk("Dom%u has maximum %u VCPUs\n", d->domain_id, d->max_vcpus); - cpu = v->processor; for ( i = 1; i < d->max_vcpus; i++ ) - { - const struct vcpu *p = dom0_setup_vcpu(d, i, cpu); - - if ( p ) - cpu = p->processor; - } + dom0_setup_vcpu(d, i); domain_update_node_affinity(d); d->arch.paging.mode = 0; diff --git a/xen/common/domain.c b/xen/common/domain.c index 1c0abda66f..78a838fab3 100644 --- a/xen/common/domain.c +++ b/xen/common/domain.c @@ -129,8 +129,7 @@ static void vcpu_destroy(struct vcpu *v) free_vcpu_struct(v); } -struct vcpu *vcpu_create( - struct domain *d, unsigned int vcpu_id, unsigned int cpu_id) +struct vcpu *vcpu_create(struct domain *d, unsigned int vcpu_id) { struct vcpu *v; @@ -162,7 +161,7 @@ struct vcpu *vcpu_create( init_waitqueue_vcpu(v); } - if ( sched_init_vcpu(v, cpu_id) != 0 ) + if ( sched_init_vcpu(v) != 0 ) goto fail_wq; if ( arch_vcpu_create(v) != 0 ) diff --git a/xen/common/domctl.c b/xen/common/domctl.c index 8464713d2b..3f86a336cc 100644 --- a/xen/common/domctl.c +++ b/xen/common/domctl.c @@ -540,8 +540,7 @@ long do_domctl(XEN_GUEST_HANDLE_PARAM(xen_domctl_t) u_domctl) case XEN_DOMCTL_max_vcpus: { - unsigned int i, max = op->u.max_vcpus.max, cpu; - cpumask_t *online; + unsigned int i, max = op->u.max_vcpus.max; ret = -EINVAL; if ( (d == current->domain) || /* no domain_pause() */ @@ -552,18 +551,13 @@ long do_domctl(XEN_GUEST_HANDLE_PARAM(xen_domctl_t) u_domctl) domain_pause(d); ret = -ENOMEM; - online = cpupool_domain_cpumask(d); for ( i = 0; i < max; i++ ) { if ( d->vcpu[i] != NULL ) continue; - cpu = (i == 0) ? - cpumask_any(online) : - cpumask_cycle(d->vcpu[i-1]->processor, online); - - if ( vcpu_create(d, i, cpu) == NULL ) + if ( vcpu_create(d, i) == NULL ) goto maxvcpu_out; } diff --git a/xen/common/schedule.c b/xen/common/schedule.c index dfd261d029..9c54811e86 100644 --- a/xen/common/schedule.c +++ b/xen/common/schedule.c @@ -314,14 +314,42 @@ static struct sched_item *sched_alloc_item(struct vcpu *v) return NULL; } -int sched_init_vcpu(struct vcpu *v, unsigned int processor) +static unsigned int sched_select_initial_cpu(struct vcpu *v) +{ + struct domain *d = v->domain; + nodeid_t node; + cpumask_t cpus; + + cpumask_clear(&cpus); + for_each_node_mask ( node, d->node_affinity ) + cpumask_or(&cpus, &cpus, &node_to_cpumask(node)); + cpumask_and(&cpus, &cpus, cpupool_domain_cpumask(d)); + if ( cpumask_empty(&cpus) ) + cpumask_copy(&cpus, cpupool_domain_cpumask(d)); + + if ( v->vcpu_id == 0 ) + return cpumask_first(&cpus); + + /* We can rely on previous vcpu being available. */ + ASSERT(!is_idle_domain(d)); + + return cpumask_cycle(d->vcpu[v->vcpu_id - 1]->processor, &cpus); +} + +int sched_init_vcpu(struct vcpu *v) { struct domain *d = v->domain; struct sched_item *item; + unsigned int processor; if ( (item = sched_alloc_item(v)) == NULL ) return 1; + if ( is_idle_domain(d) ) + processor = v->vcpu_id; + else + processor = sched_select_initial_cpu(v); + sched_set_res(item, per_cpu(sched_res, processor)); /* Initialise the per-vcpu timers. */ @@ -1673,7 +1701,7 @@ static int cpu_schedule_up(unsigned int cpu) return 0; if ( idle_vcpu[cpu] == NULL ) - vcpu_create(idle_vcpu[0]->domain, cpu, cpu); + vcpu_create(idle_vcpu[0]->domain, cpu); else { struct vcpu *idle = idle_vcpu[cpu]; @@ -1867,7 +1895,7 @@ void __init scheduler_init(void) BUG_ON(nr_cpu_ids > ARRAY_SIZE(idle_vcpu)); idle_domain->vcpu = idle_vcpu; idle_domain->max_vcpus = nr_cpu_ids; - if ( vcpu_create(idle_domain, 0, 0) == NULL ) + if ( vcpu_create(idle_domain, 0) == NULL ) BUG(); this_cpu(sched_res)->curr = idle_vcpu[0]->sched_item; this_cpu(sched_res)->sched_priv = sched_alloc_pdata(&ops, 0); diff --git a/xen/include/asm-x86/dom0_build.h b/xen/include/asm-x86/dom0_build.h index 33a5483739..3eb4b036e1 100644 --- a/xen/include/asm-x86/dom0_build.h +++ b/xen/include/asm-x86/dom0_build.h @@ -11,8 +11,7 @@ extern unsigned int dom0_memflags; unsigned long dom0_compute_nr_pages(struct domain *d, struct elf_dom_parms *parms, unsigned long initrd_len); -struct vcpu *dom0_setup_vcpu(struct domain *d, unsigned int vcpu_id, - unsigned int cpu); +struct vcpu *dom0_setup_vcpu(struct domain *d, unsigned int vcpu_id); int dom0_setup_permissions(struct domain *d); int dom0_construct_pv(struct domain *d, const module_t *image, diff --git a/xen/include/xen/domain.h b/xen/include/xen/domain.h index d1bfc82f57..a6e929685c 100644 --- a/xen/include/xen/domain.h +++ b/xen/include/xen/domain.h @@ -13,8 +13,7 @@ typedef union { struct compat_vcpu_guest_context *cmp; } vcpu_guest_context_u __attribute__((__transparent_union__)); -struct vcpu *vcpu_create( - struct domain *d, unsigned int vcpu_id, unsigned int cpu_id); +struct vcpu *vcpu_create(struct domain *d, unsigned int vcpu_id); unsigned int dom0_max_vcpus(void); struct vcpu *alloc_dom0_vcpu0(struct domain *dom0); diff --git a/xen/include/xen/sched.h b/xen/include/xen/sched.h index da117365af..8052f98780 100644 --- a/xen/include/xen/sched.h +++ b/xen/include/xen/sched.h @@ -663,7 +663,7 @@ void __domain_crash(struct domain *d); void noreturn asm_domain_crash_synchronous(unsigned long addr); void scheduler_init(void); -int sched_init_vcpu(struct vcpu *v, unsigned int processor); +int sched_init_vcpu(struct vcpu *v); void sched_destroy_vcpu(struct vcpu *v); int sched_init_domain(struct domain *d, int poolid); void sched_destroy_domain(struct domain *d);