Message ID | 20211214123250.88230-2-dwmw2@infradead.org (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | Parallel CPU bringup for x86_64 | expand |
On Tue, Dec 14, 2021, David Woodhouse wrote: > -static int alloc_clustermask(unsigned int cpu, int node) > +static int alloc_clustermask(unsigned int cpu, u32 cluster, int node) > { > + struct cluster_mask *cmsk = NULL; > + unsigned int cpu_i; > + u32 apicid; > + > if (per_cpu(cluster_masks, cpu)) > return 0; > - /* > - * If a hotplug spare mask exists, check whether it's on the right > - * node. If not, free it and allocate a new one. > + > + /* For the hotplug case, don't always allocate a new one */ > + for_each_present_cpu(cpu_i) { > + apicid = apic->cpu_present_to_apicid(cpu_i); > + if (apicid != BAD_APICID && apicid >> 4 == cluster) { > + cmsk = per_cpu(cluster_masks, cpu_i); > + if (cmsk) > + break; > + } > + } > + if (!cmsk) { > + cmsk = kzalloc_node(sizeof(*cmsk), GFP_KERNEL, node); > + } > + if (!cmsk) > + return -ENOMEM; This can be, if (!cmsk) { cmsk = kzalloc_node(sizeof(*cmsk), GFP_KERNEL, node); if (!cmsk) return -ENOMEM; } which IMO is more intuitive, and it also "fixes" the unnecessary braces in thew initial check. > + > + cmsk->node = node; > + cmsk->clusterid = cluster; > + > + per_cpu(cluster_masks, cpu) = cmsk; > + > + /* > + * As an optimisation during boot, set the cluster_mask for *all* > + * present CPUs at once, to prevent *each* of them having to iterate > + * over the others to find the existing cluster_mask. > */ > - if (cluster_hotplug_mask) { > - if (cluster_hotplug_mask->node == node) > - return 0; > - kfree(cluster_hotplug_mask); > + if (system_state < SYSTEM_RUNNING) { This can be if (system_state >= SYSTEM_RUNNING) return 0; to reduce indentation below. > + for_each_present_cpu(cpu) { Reusing @cpu here is all kinds of confusing. > + u32 apicid = apic->cpu_present_to_apicid(cpu); This shadows apicid. That's completely unnecessary. > + if (apicid != BAD_APICID && apicid >> 4 == cluster) { A helper for retrieving the cluster from a cpu would dedup at least three instances of this pattern. > + struct cluster_mask **cpu_cmsk = &per_cpu(cluster_masks, cpu); > + if (*cpu_cmsk) > + BUG_ON(*cpu_cmsk != cmsk); > + else > + *cpu_cmsk = cmsk; The if statement is a little confusing because of the double pointer. BUG_ON() won't return, maybe write it like so? BUG_ON(*cpu_mask && *cpu_mask != cmsk); *cpu_cmsk = cmsk; > + } > + } > } > > - cluster_hotplug_mask = kzalloc_node(sizeof(*cluster_hotplug_mask), > - GFP_KERNEL, node); > - if (!cluster_hotplug_mask) > - return -ENOMEM; > - cluster_hotplug_mask->node = node; > return 0; > } > > static int x2apic_prepare_cpu(unsigned int cpu) > { > - if (alloc_clustermask(cpu, cpu_to_node(cpu)) < 0) > + u32 phys_apicid = apic->cpu_present_to_apicid(cpu); > + u32 cluster = phys_apicid >> 4; > + u32 logical_apicid = (cluster << 16) | (1 << (phys_apicid & 0xf)); > + > + x86_cpu_to_logical_apicid[cpu] = logical_apicid; > + > + if (alloc_clustermask(cpu, cluster, cpu_to_node(cpu)) < 0) > return -ENOMEM; > if (!zalloc_cpumask_var(&per_cpu(ipi_mask, cpu), GFP_KERNEL)) > return -ENOMEM; > -- > 2.31.1 >
On Tue, 2021-12-14 at 17:10 +0000, Sean Christopherson wrote: > On Tue, Dec 14, 2021, David Woodhouse wrote: > > -static int alloc_clustermask(unsigned int cpu, int node) > > +static int alloc_clustermask(unsigned int cpu, u32 cluster, int node) > > { > > + struct cluster_mask *cmsk = NULL; > > + unsigned int cpu_i; > > + u32 apicid; > > + > > if (per_cpu(cluster_masks, cpu)) > > return 0; > > - /* > > - * If a hotplug spare mask exists, check whether it's on the right > > - * node. If not, free it and allocate a new one. > > + > > + /* For the hotplug case, don't always allocate a new one */ > > + for_each_present_cpu(cpu_i) { > > + apicid = apic->cpu_present_to_apicid(cpu_i); > > + if (apicid != BAD_APICID && apicid >> 4 == cluster) { > > + cmsk = per_cpu(cluster_masks, cpu_i); > > + if (cmsk) > > + break; > > + } > > + } > > + if (!cmsk) { > > + cmsk = kzalloc_node(sizeof(*cmsk), GFP_KERNEL, node); > > + } > > + if (!cmsk) > > + return -ENOMEM; > > This can be, > > if (!cmsk) { > cmsk = kzalloc_node(sizeof(*cmsk), GFP_KERNEL, node); > if (!cmsk) > return -ENOMEM; > } > > which IMO is more intuitive, and it also "fixes" the unnecessary braces in thew > initial check. > > > + > > + cmsk->node = node; > > + cmsk->clusterid = cluster; > > + > > + per_cpu(cluster_masks, cpu) = cmsk; > > + > > + /* > > + * As an optimisation during boot, set the cluster_mask for *all* > > + * present CPUs at once, to prevent *each* of them having to iterate > > + * over the others to find the existing cluster_mask. > > */ > > - if (cluster_hotplug_mask) { > > - if (cluster_hotplug_mask->node == node) > > - return 0; > > - kfree(cluster_hotplug_mask); > > + if (system_state < SYSTEM_RUNNING) { > > This can be > > if (system_state >= SYSTEM_RUNNING) > return 0; > > to reduce indentation below. > > > + for_each_present_cpu(cpu) { > > Reusing @cpu here is all kinds of confusing. > > > + u32 apicid = apic->cpu_present_to_apicid(cpu); > > This shadows apicid. That's completely unnecessary. > > > + if (apicid != BAD_APICID && apicid >> 4 == cluster) { > > A helper for retrieving the cluster from a cpu would dedup at least three instances > of this pattern. Thanks. Let's just lift the whole thing out into a separate function. And actually now I come to stare harder at it, the whole of the struct cluster_mask can go away too. Looks a bit more like this now: --- arch/x86/kernel/apic/x2apic_cluster.c | 108 +++++++++++++++----------- 1 file changed, 62 insertions(+), 46 deletions(-) diff --git a/arch/x86/kernel/apic/x2apic_cluster.c b/arch/x86/kernel/apic/x2apic_cluster.c index e696e22d0531..e116dfaf5922 100644 --- a/arch/x86/kernel/apic/x2apic_cluster.c +++ b/arch/x86/kernel/apic/x2apic_cluster.c @@ -9,11 +9,7 @@ #include "local.h" -struct cluster_mask { - unsigned int clusterid; - int node; - struct cpumask mask; -}; +#define apic_cluster(apicid) ((apicid) >> 4) /* * __x2apic_send_IPI_mask() possibly needs to read @@ -23,8 +19,7 @@ struct cluster_mask { static u32 *x86_cpu_to_logical_apicid __read_mostly; static DEFINE_PER_CPU(cpumask_var_t, ipi_mask); -static DEFINE_PER_CPU_READ_MOSTLY(struct cluster_mask *, cluster_masks); -static struct cluster_mask *cluster_hotplug_mask; +static DEFINE_PER_CPU_READ_MOSTLY(struct cpumask *, cluster_masks); static int x2apic_acpi_madt_oem_check(char *oem_id, char *oem_table_id) { @@ -60,10 +55,10 @@ __x2apic_send_IPI_mask(const struct cpumask *mask, int vector, int apic_dest) /* Collapse cpus in a cluster so a single IPI per cluster is sent */ for_each_cpu(cpu, tmpmsk) { - struct cluster_mask *cmsk = per_cpu(cluster_masks, cpu); + struct cpumask *cmsk = per_cpu(cluster_masks, cpu); dest = 0; - for_each_cpu_and(clustercpu, tmpmsk, &cmsk->mask) + for_each_cpu_and(clustercpu, tmpmsk, cmsk) dest |= x86_cpu_to_logical_apicid[clustercpu]; if (!dest) @@ -71,7 +66,7 @@ __x2apic_send_IPI_mask(const struct cpumask *mask, int vector, int apic_dest) __x2apic_send_IPI_dest(dest, vector, APIC_DEST_LOGICAL); /* Remove cluster CPUs from tmpmask */ - cpumask_andnot(tmpmsk, tmpmsk, &cmsk->mask); + cpumask_andnot(tmpmsk, tmpmsk, cmsk); } local_irq_restore(flags); @@ -105,55 +100,76 @@ static u32 x2apic_calc_apicid(unsigned int cpu) static void init_x2apic_ldr(void) { - struct cluster_mask *cmsk = this_cpu_read(cluster_masks); - u32 cluster, apicid = apic_read(APIC_LDR); - unsigned int cpu; + struct cpumask *cmsk = this_cpu_read(cluster_masks); - x86_cpu_to_logical_apicid[smp_processor_id()] = apicid; + BUG_ON(!cmsk); - if (cmsk) - goto update; - - cluster = apicid >> 16; - for_each_online_cpu(cpu) { - cmsk = per_cpu(cluster_masks, cpu); - /* Matching cluster found. Link and update it. */ - if (cmsk && cmsk->clusterid == cluster) - goto update; + cpumask_set_cpu(smp_processor_id(), cmsk); +} + +/* + * As an optimisation during boot, set the cluster_mask for *all* + * present CPUs at once, to prevent *each* of them having to iterate + * over the others to find the existing cluster_mask. + */ +static void prefill_clustermask(struct cpumask *cmsk, u32 cluster) +{ + int cpu; + + for_each_present_cpu(cpu) { + u32 apicid = apic->cpu_present_to_apicid(cpu); + if (apicid != BAD_APICID && apic_cluster(apicid) == cluster) { + struct cpumask **cpu_cmsk = &per_cpu(cluster_masks, cpu); + + BUG_ON(*cpu_cmsk && *cpu_cmsk != cmsk); + *cpu_cmsk = cmsk; + } } - cmsk = cluster_hotplug_mask; - cmsk->clusterid = cluster; - cluster_hotplug_mask = NULL; -update: - this_cpu_write(cluster_masks, cmsk); - cpumask_set_cpu(smp_processor_id(), &cmsk->mask); } -static int alloc_clustermask(unsigned int cpu, int node) +static int alloc_clustermask(unsigned int cpu, u32 cluster, int node) { + struct cpumask *cmsk = NULL; + unsigned int cpu_i; + u32 apicid; + if (per_cpu(cluster_masks, cpu)) return 0; - /* - * If a hotplug spare mask exists, check whether it's on the right - * node. If not, free it and allocate a new one. - */ - if (cluster_hotplug_mask) { - if (cluster_hotplug_mask->node == node) - return 0; - kfree(cluster_hotplug_mask); + + /* For the hotplug case, don't always allocate a new one */ + if (system_state >= SYSTEM_RUNNING) { + for_each_present_cpu(cpu_i) { + apicid = apic->cpu_present_to_apicid(cpu_i); + if (apicid != BAD_APICID && apic_cluster(apicid) == cluster) { + cmsk = per_cpu(cluster_masks, cpu_i); + if (cmsk) + break; + } + } + } + if (!cmsk) { + cmsk = kzalloc_node(sizeof(*cmsk), GFP_KERNEL, node); + if (!cmsk) + return -ENOMEM; } - cluster_hotplug_mask = kzalloc_node(sizeof(*cluster_hotplug_mask), - GFP_KERNEL, node); - if (!cluster_hotplug_mask) - return -ENOMEM; - cluster_hotplug_mask->node = node; + per_cpu(cluster_masks, cpu) = cmsk; + + if (system_state < SYSTEM_RUNNING) + prefill_clustermask(cmsk, cluster); + return 0; } static int x2apic_prepare_cpu(unsigned int cpu) { - if (alloc_clustermask(cpu, cpu_to_node(cpu)) < 0) + u32 phys_apicid = apic->cpu_present_to_apicid(cpu); + u32 cluster = apic_cluster(phys_apicid); + u32 logical_apicid = (cluster << 16) | (1 << (phys_apicid & 0xf)); + + x86_cpu_to_logical_apicid[cpu] = logical_apicid; + + if (alloc_clustermask(cpu, cluster, cpu_to_node(cpu)) < 0) return -ENOMEM; if (!zalloc_cpumask_var(&per_cpu(ipi_mask, cpu), GFP_KERNEL)) return -ENOMEM; @@ -162,10 +178,10 @@ static int x2apic_prepare_cpu(unsigned int cpu) static int x2apic_dead_cpu(unsigned int dead_cpu) { - struct cluster_mask *cmsk = per_cpu(cluster_masks, dead_cpu); + struct cpumask *cmsk = per_cpu(cluster_masks, dead_cpu); if (cmsk) - cpumask_clear_cpu(dead_cpu, &cmsk->mask); + cpumask_clear_cpu(dead_cpu, cmsk); free_cpumask_var(per_cpu(ipi_mask, dead_cpu)); return 0; }
diff --git a/arch/x86/kernel/apic/x2apic_cluster.c b/arch/x86/kernel/apic/x2apic_cluster.c index e696e22d0531..4ff6a6005ad6 100644 --- a/arch/x86/kernel/apic/x2apic_cluster.c +++ b/arch/x86/kernel/apic/x2apic_cluster.c @@ -24,7 +24,6 @@ static u32 *x86_cpu_to_logical_apicid __read_mostly; static DEFINE_PER_CPU(cpumask_var_t, ipi_mask); static DEFINE_PER_CPU_READ_MOSTLY(struct cluster_mask *, cluster_masks); -static struct cluster_mask *cluster_hotplug_mask; static int x2apic_acpi_madt_oem_check(char *oem_id, char *oem_table_id) { @@ -106,54 +105,71 @@ static u32 x2apic_calc_apicid(unsigned int cpu) static void init_x2apic_ldr(void) { struct cluster_mask *cmsk = this_cpu_read(cluster_masks); - u32 cluster, apicid = apic_read(APIC_LDR); - unsigned int cpu; - x86_cpu_to_logical_apicid[smp_processor_id()] = apicid; + BUG_ON(!cmsk); - if (cmsk) - goto update; - - cluster = apicid >> 16; - for_each_online_cpu(cpu) { - cmsk = per_cpu(cluster_masks, cpu); - /* Matching cluster found. Link and update it. */ - if (cmsk && cmsk->clusterid == cluster) - goto update; - } - cmsk = cluster_hotplug_mask; - cmsk->clusterid = cluster; - cluster_hotplug_mask = NULL; -update: - this_cpu_write(cluster_masks, cmsk); cpumask_set_cpu(smp_processor_id(), &cmsk->mask); } -static int alloc_clustermask(unsigned int cpu, int node) +static int alloc_clustermask(unsigned int cpu, u32 cluster, int node) { + struct cluster_mask *cmsk = NULL; + unsigned int cpu_i; + u32 apicid; + if (per_cpu(cluster_masks, cpu)) return 0; - /* - * If a hotplug spare mask exists, check whether it's on the right - * node. If not, free it and allocate a new one. + + /* For the hotplug case, don't always allocate a new one */ + for_each_present_cpu(cpu_i) { + apicid = apic->cpu_present_to_apicid(cpu_i); + if (apicid != BAD_APICID && apicid >> 4 == cluster) { + cmsk = per_cpu(cluster_masks, cpu_i); + if (cmsk) + break; + } + } + if (!cmsk) { + cmsk = kzalloc_node(sizeof(*cmsk), GFP_KERNEL, node); + } + if (!cmsk) + return -ENOMEM; + + cmsk->node = node; + cmsk->clusterid = cluster; + + per_cpu(cluster_masks, cpu) = cmsk; + + /* + * As an optimisation during boot, set the cluster_mask for *all* + * present CPUs at once, to prevent *each* of them having to iterate + * over the others to find the existing cluster_mask. */ - if (cluster_hotplug_mask) { - if (cluster_hotplug_mask->node == node) - return 0; - kfree(cluster_hotplug_mask); + if (system_state < SYSTEM_RUNNING) { + for_each_present_cpu(cpu) { + u32 apicid = apic->cpu_present_to_apicid(cpu); + if (apicid != BAD_APICID && apicid >> 4 == cluster) { + struct cluster_mask **cpu_cmsk = &per_cpu(cluster_masks, cpu); + if (*cpu_cmsk) + BUG_ON(*cpu_cmsk != cmsk); + else + *cpu_cmsk = cmsk; + } + } } - cluster_hotplug_mask = kzalloc_node(sizeof(*cluster_hotplug_mask), - GFP_KERNEL, node); - if (!cluster_hotplug_mask) - return -ENOMEM; - cluster_hotplug_mask->node = node; return 0; } static int x2apic_prepare_cpu(unsigned int cpu) { - if (alloc_clustermask(cpu, cpu_to_node(cpu)) < 0) + u32 phys_apicid = apic->cpu_present_to_apicid(cpu); + u32 cluster = phys_apicid >> 4; + u32 logical_apicid = (cluster << 16) | (1 << (phys_apicid & 0xf)); + + x86_cpu_to_logical_apicid[cpu] = logical_apicid; + + if (alloc_clustermask(cpu, cluster, cpu_to_node(cpu)) < 0) return -ENOMEM; if (!zalloc_cpumask_var(&per_cpu(ipi_mask, cpu), GFP_KERNEL)) return -ENOMEM;