Message ID | 20201007122046.1113577-4-dwmw2@infradead.org (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | Fix x2apic enablement and allow up to 32768 CPUs without IR where supported | expand |
On Wed, Oct 07 2020 at 13:20, David Woodhouse wrote: > > + /* > + * If the hypervisor supports extended destination ID in IOAPIC > + * and MSI, that increases the maximum APIC ID that can be used > + * for non-remapped IRQ domains. > + */ > + if (x86_init.hyper.msi_ext_dest_id()) { > + msi_ext_dest_id = 1; > + apic_limit = 32767; > + } This needs to be outside of the remap mode check because? > + > if (remap_mode != IRQ_REMAP_X2APIC_MODE) { > /* > * Using X2APIC without IR is not architecturally supported > @@ -1856,9 +1868,10 @@ static __init void try_to_enable_x2apic(int remap_mode) > * in physical mode, and CPUs with an APIC ID that cannnot > * be addressed must not be brought online. > */ > - x2apic_set_max_apicid(255); > + x2apic_set_max_apicid(apic_limit); > x2apic_phys = 1; > } > + > x2apic_enable(); > } > > diff --git a/arch/x86/kernel/apic/msi.c b/arch/x86/kernel/apic/msi.c > index 2825e003259c..85206f971284 100644 > --- a/arch/x86/kernel/apic/msi.c > +++ b/arch/x86/kernel/apic/msi.c > @@ -23,8 +23,11 @@ > > struct irq_domain *x86_pci_msi_default_domain __ro_after_init; > > +int msi_ext_dest_id __ro_after_init; bool please. Aside of that this breaks the build for a kernel with CONFIG_PCI_MSI=n Thanks, tglx
On Thu, Oct 08 2020 at 13:54, Thomas Gleixner wrote: > On Wed, Oct 07 2020 at 13:20, David Woodhouse wrote: >> diff --git a/arch/x86/kernel/apic/msi.c b/arch/x86/kernel/apic/msi.c >> index 2825e003259c..85206f971284 100644 >> --- a/arch/x86/kernel/apic/msi.c >> +++ b/arch/x86/kernel/apic/msi.c >> @@ -23,8 +23,11 @@ >> >> struct irq_domain *x86_pci_msi_default_domain __ro_after_init; >> >> +int msi_ext_dest_id __ro_after_init; > > bool please. > > Aside of that this breaks the build for a kernel with CONFIG_PCI_MSI=n So this wants to be bool virt_ext_dest_id __ro_after_init; in apic.c and then please make the IO/APIC places depend on this as well so any change to the utilization of the reserved IO/APIC bits in the future is not going to end up in surprises. Thanks, tglx
On Thu, 2020-10-08 at 13:54 +0200, Thomas Gleixner wrote: > On Wed, Oct 07 2020 at 13:20, David Woodhouse wrote: > > > > + /* > > + * If the hypervisor supports extended destination ID in IOAPIC > > + * and MSI, that increases the maximum APIC ID that can be used > > + * for non-remapped IRQ domains. > > + */ > > + if (x86_init.hyper.msi_ext_dest_id()) { > > + msi_ext_dest_id = 1; > > + apic_limit = 32767; > > + } > > This needs to be outside of the remap mode check because? Once upon a time, there was a later patch in the series which *also* used the apic_limit variable to generate a maximum affinity mask. Now we've ditched that idea, I can put this back inside the remap mode check. > > > + > > if (remap_mode != IRQ_REMAP_X2APIC_MODE) { > > /* > > * Using X2APIC without IR is not architecturally supported > > @@ -1856,9 +1868,10 @@ static __init void try_to_enable_x2apic(int remap_mode) > > * in physical mode, and CPUs with an APIC ID that cannnot > > * be addressed must not be brought online. > > */ > > - x2apic_set_max_apicid(255); > > + x2apic_set_max_apicid(apic_limit); > > x2apic_phys = 1; > > } > > + > > x2apic_enable(); > > } > > > > diff --git a/arch/x86/kernel/apic/msi.c b/arch/x86/kernel/apic/msi.c > > index 2825e003259c..85206f971284 100644 > > --- a/arch/x86/kernel/apic/msi.c > > +++ b/arch/x86/kernel/apic/msi.c > > @@ -23,8 +23,11 @@ > > > > struct irq_domain *x86_pci_msi_default_domain __ro_after_init; > > > > +int msi_ext_dest_id __ro_after_init; > > bool please. > > Aside of that this breaks the build for a kernel with CONFIG_PCI_MSI=n Will fix (and rename).
diff --git a/arch/x86/include/asm/mpspec.h b/arch/x86/include/asm/mpspec.h index e90ac7e9ae2c..25ee8ca0a1f2 100644 --- a/arch/x86/include/asm/mpspec.h +++ b/arch/x86/include/asm/mpspec.h @@ -42,6 +42,7 @@ extern DECLARE_BITMAP(mp_bus_not_pci, MAX_MP_BUSSES); extern unsigned int boot_cpu_physical_apicid; extern u8 boot_cpu_apic_version; extern unsigned long mp_lapic_addr; +extern int msi_ext_dest_id; #ifdef CONFIG_X86_LOCAL_APIC extern int smp_found_config; diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h index 397196fae24d..5af3fe9e38f3 100644 --- a/arch/x86/include/asm/x86_init.h +++ b/arch/x86/include/asm/x86_init.h @@ -114,6 +114,7 @@ struct x86_init_pci { * @init_platform: platform setup * @guest_late_init: guest late init * @x2apic_available: X2APIC detection + * @msi_ext_dest_id: MSI and IOAPIC support 15-bit APIC IDs * @init_mem_mapping: setup early mappings during init_mem_mapping() * @init_after_bootmem: guest init after boot allocator is finished */ @@ -121,6 +122,7 @@ struct x86_hyper_init { void (*init_platform)(void); void (*guest_late_init)(void); bool (*x2apic_available)(void); + bool (*msi_ext_dest_id)(void); void (*init_mem_mapping)(void); void (*init_after_bootmem)(void); }; diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index 113f6ca7b828..ba24a343c1f2 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -1837,9 +1837,21 @@ static __init void x2apic_enable(void) static __init void try_to_enable_x2apic(int remap_mode) { + u32 apic_limit = 255; + if (x2apic_state == X2APIC_DISABLED) return; + /* + * If the hypervisor supports extended destination ID in IOAPIC + * and MSI, that increases the maximum APIC ID that can be used + * for non-remapped IRQ domains. + */ + if (x86_init.hyper.msi_ext_dest_id()) { + msi_ext_dest_id = 1; + apic_limit = 32767; + } + if (remap_mode != IRQ_REMAP_X2APIC_MODE) { /* * Using X2APIC without IR is not architecturally supported @@ -1856,9 +1868,10 @@ static __init void try_to_enable_x2apic(int remap_mode) * in physical mode, and CPUs with an APIC ID that cannnot * be addressed must not be brought online. */ - x2apic_set_max_apicid(255); + x2apic_set_max_apicid(apic_limit); x2apic_phys = 1; } + x2apic_enable(); } diff --git a/arch/x86/kernel/apic/msi.c b/arch/x86/kernel/apic/msi.c index 2825e003259c..85206f971284 100644 --- a/arch/x86/kernel/apic/msi.c +++ b/arch/x86/kernel/apic/msi.c @@ -23,8 +23,11 @@ struct irq_domain *x86_pci_msi_default_domain __ro_after_init; +int msi_ext_dest_id __ro_after_init; + static void __irq_msi_compose_msg(struct irq_cfg *cfg, struct msi_msg *msg, bool dmar) + { msg->address_hi = MSI_ADDR_BASE_HI; @@ -46,10 +49,15 @@ static void __irq_msi_compose_msg(struct irq_cfg *cfg, struct msi_msg *msg, * Only the IOMMU itself can use the trick of putting destination * APIC ID into the high bits of the address. Anything else would * just be writing to memory if it tried that, and needs IR to - * address higher APIC IDs. + * address APICs which can't be addressed in the normal 32-bit + * address range at 0xFFExxxxx. That is typically just 8 bits, but + * some hypervisors allow the extended destination ID field in bits + * 11-5 to be used, giving support for 15 bits of APIC IDs in total. */ if (dmar) msg->address_hi |= MSI_ADDR_EXT_DEST_ID(cfg->dest_apicid); + else if (msi_ext_dest_id && cfg->dest_apicid < 0x8000) + msg->address_lo |= MSI_ADDR_EXT_DEST_ID(cfg->dest_apicid) >> 3; else WARN_ON_ONCE(MSI_ADDR_EXT_DEST_ID(cfg->dest_apicid)); } diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c index a3038d8deb6a..8b395821cb8d 100644 --- a/arch/x86/kernel/x86_init.c +++ b/arch/x86/kernel/x86_init.c @@ -110,6 +110,7 @@ struct x86_init_ops x86_init __initdata = { .init_platform = x86_init_noop, .guest_late_init = x86_init_noop, .x2apic_available = bool_x86_init_noop, + .msi_ext_dest_id = bool_x86_init_noop, .init_mem_mapping = x86_init_noop, .init_after_bootmem = x86_init_noop, },