Message ID | 20230302111227.2102545-1-usama.arif@bytedance.com (mailing list archive) |
---|---|
Headers | show |
Series | Parallel CPU bringup for x86_64 | expand |
On Thu, 2023-03-02 at 11:12 +0000, Usama Arif wrote: > The main code change over v12 is to fix the build error when > CONFIG_FORCE_NR_CPUS is present. > > The commit message for removing initial stack has also been improved, typos > have been fixed and extra comments have been added to make code clearer. Might something like this make it work in parallel with SEV-SNP? If so, I can clean it up and adjust the C code to actually invoke it... diff --git a/arch/x86/include/asm/sev-common.h b/arch/x86/include/asm/sev-common.h index b8357d6ecd47..f25df4bd318e 100644 --- a/arch/x86/include/asm/sev-common.h +++ b/arch/x86/include/asm/sev-common.h @@ -70,6 +70,7 @@ /* GHCBData[63:12] */ \ (((u64)(v) & GENMASK_ULL(63, 12)) >> 12) +#ifndef __ASSEMBLY__ /* * SNP Page State Change Operation * @@ -160,6 +161,8 @@ struct snp_psc_desc { #define GHCB_RESP_CODE(v) ((v) & GHCB_MSR_INFO_MASK) +#endif /* __ASSEMBLY__ */ + /* * Error codes related to GHCB input that can be communicated back to the guest * by setting the lower 32-bits of the GHCB SW_EXITINFO1 field to 2. diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h index defe76ee9e64..0c26f80f488c 100644 --- a/arch/x86/include/asm/smp.h +++ b/arch/x86/include/asm/smp.h @@ -204,6 +204,7 @@ extern unsigned int smpboot_control; /* Control bits for startup_64 */ #define STARTUP_APICID_CPUID_0B 0x80000000 #define STARTUP_APICID_CPUID_01 0x40000000 +#define STARTUP_APICID_SEV_SNP 0x20000000 #define STARTUP_PARALLEL_MASK (STARTUP_APICID_CPUID_01 | STARTUP_APICID_CPUID_0B) diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S index c35f7c173832..b2571034562c 100644 --- a/arch/x86/kernel/head_64.S +++ b/arch/x86/kernel/head_64.S @@ -26,7 +26,7 @@ #include <asm/nospec-branch.h> #include <asm/fixmap.h> #include <asm/smp.h> - +#include <asm/sev-common.h> /* * We are not able to switch in one step to the final KERNEL ADDRESS SPACE * because we need identity-mapped pages. @@ -242,6 +242,7 @@ SYM_INNER_LABEL(secondary_startup_64_no_verify, SYM_L_GLOBAL) * * Bit 31 STARTUP_APICID_CPUID_0B flag (use CPUID 0x0b) * Bit 30 STARTUP_APICID_CPUID_01 flag (use CPUID 0x01) + * Bit 29 STARTUP_APICID_SEV_SNP flag (CPUID 0x0v via GHCB MSR) * Bit 0-24 CPU# if STARTUP_APICID_CPUID_xx flags are not set */ movl smpboot_control(%rip), %ecx @@ -249,6 +250,8 @@ SYM_INNER_LABEL(secondary_startup_64_no_verify, SYM_L_GLOBAL) jnz .Luse_cpuid_0b testl $STARTUP_APICID_CPUID_01, %ecx jnz .Luse_cpuid_01 + testl $STARTUP_APICID_SEV_SNP, %ecx + jnz .Luse_sev_cpuid_0b andl $0x0FFFFFFF, %ecx jmp .Lsetup_cpu @@ -259,6 +262,18 @@ SYM_INNER_LABEL(secondary_startup_64_no_verify, SYM_L_GLOBAL) shr $24, %edx jmp .Lsetup_AP +.Luse_sev_cpuid_0b: + movl $MSR_AMD64_SEV_ES_GHCB, %ecx + # GHCB_CPUID_REQ(0x0b, GHCB_CPUID_REQ_EDX) + movq $0xc00000040000000b, %rax + xorl %edx, %edx + vmgexit + rdmsr + andl $GHCB_MSR_INFO_MASK, %eax + cmpl $GHCB_MSR_CPUID_RESP, %eax + jne 1f + jmp .Lsetup_AP + .Luse_cpuid_0b: mov $0x0B, %eax xorl %ecx, %ecx
On 3/7/23 08:42, David Woodhouse wrote: > On Thu, 2023-03-02 at 11:12 +0000, Usama Arif wrote: >> The main code change over v12 is to fix the build error when >> CONFIG_FORCE_NR_CPUS is present. >> >> The commit message for removing initial stack has also been improved, typos >> have been fixed and extra comments have been added to make code clearer. > > Might something like this make it work in parallel with SEV-SNP? If so, > I can clean it up and adjust the C code to actually invoke it... This should be ok for both SEV-ES and SEV-SNP. > > diff --git a/arch/x86/include/asm/sev-common.h b/arch/x86/include/asm/sev-common.h > index b8357d6ecd47..f25df4bd318e 100644 > --- a/arch/x86/include/asm/sev-common.h > +++ b/arch/x86/include/asm/sev-common.h > @@ -70,6 +70,7 @@ > /* GHCBData[63:12] */ \ > (((u64)(v) & GENMASK_ULL(63, 12)) >> 12) > > +#ifndef __ASSEMBLY__ > /* > * SNP Page State Change Operation > * > @@ -160,6 +161,8 @@ struct snp_psc_desc { > > #define GHCB_RESP_CODE(v) ((v) & GHCB_MSR_INFO_MASK) > > +#endif /* __ASSEMBLY__ */ > + > /* > * Error codes related to GHCB input that can be communicated back to the guest > * by setting the lower 32-bits of the GHCB SW_EXITINFO1 field to 2. > diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h > index defe76ee9e64..0c26f80f488c 100644 > --- a/arch/x86/include/asm/smp.h > +++ b/arch/x86/include/asm/smp.h > @@ -204,6 +204,7 @@ extern unsigned int smpboot_control; > /* Control bits for startup_64 */ > #define STARTUP_APICID_CPUID_0B 0x80000000 > #define STARTUP_APICID_CPUID_01 0x40000000 > +#define STARTUP_APICID_SEV_SNP 0x20000000 > > #define STARTUP_PARALLEL_MASK (STARTUP_APICID_CPUID_01 | STARTUP_APICID_CPUID_0B) > > diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S > index c35f7c173832..b2571034562c 100644 > --- a/arch/x86/kernel/head_64.S > +++ b/arch/x86/kernel/head_64.S > @@ -26,7 +26,7 @@ > #include <asm/nospec-branch.h> > #include <asm/fixmap.h> > #include <asm/smp.h> > - > +#include <asm/sev-common.h> > /* > * We are not able to switch in one step to the final KERNEL ADDRESS SPACE > * because we need identity-mapped pages. > @@ -242,6 +242,7 @@ SYM_INNER_LABEL(secondary_startup_64_no_verify, SYM_L_GLOBAL) > * > * Bit 31 STARTUP_APICID_CPUID_0B flag (use CPUID 0x0b) > * Bit 30 STARTUP_APICID_CPUID_01 flag (use CPUID 0x01) > + * Bit 29 STARTUP_APICID_SEV_SNP flag (CPUID 0x0v via GHCB MSR) > * Bit 0-24 CPU# if STARTUP_APICID_CPUID_xx flags are not set > */ > movl smpboot_control(%rip), %ecx > @@ -249,6 +250,8 @@ SYM_INNER_LABEL(secondary_startup_64_no_verify, SYM_L_GLOBAL) > jnz .Luse_cpuid_0b > testl $STARTUP_APICID_CPUID_01, %ecx > jnz .Luse_cpuid_01 > + testl $STARTUP_APICID_SEV_SNP, %ecx > + jnz .Luse_sev_cpuid_0b > andl $0x0FFFFFFF, %ecx > jmp .Lsetup_cpu > > @@ -259,6 +262,18 @@ SYM_INNER_LABEL(secondary_startup_64_no_verify, SYM_L_GLOBAL) > shr $24, %edx > jmp .Lsetup_AP > > +.Luse_sev_cpuid_0b: > + movl $MSR_AMD64_SEV_ES_GHCB, %ecx > + # GHCB_CPUID_REQ(0x0b, GHCB_CPUID_REQ_EDX) > + movq $0xc00000040000000b, %rax > + xorl %edx, %edx > + vmgexit According to the GHCB spec, the GHCB MSR protocol is triggered when the GHCB value is non-zero in bits 11:0. For the CPUID function, bits 63:32 hold the CPUID function, bits 31:30 hold the requested register and bits 11:0 == 0x4. So this should be: /* Set the GHCB MSR to request CPUID 0xB_EDX */ movl $MSR_AMD64_SEV_ES_GHCB, %ecx movl $0xc0000004, %eax movl $0xb, %edx wrmsr /* Perform GHCB MSR protocol */ vmgexit /* * Get the result. After the RDMSR: * EAX should be 0xc0000005 * EDX should have the CPUID register value and since EDX * is the target register, no need to move the result. */ > + rdmsr > + andl $GHCB_MSR_INFO_MASK, %eax > + cmpl $GHCB_MSR_CPUID_RESP, %eax > + jne 1f > + jmp .Lsetup_AP > + > .Luse_cpuid_0b: > mov $0x0B, %eax > xorl %ecx, %ecx > Thanks, Tom >
On Tue, 2023-03-07 at 10:45 -0600, Tom Lendacky wrote: > On 3/7/23 08:42, David Woodhouse wrote: > > On Thu, 2023-03-02 at 11:12 +0000, Usama Arif wrote: > > > The main code change over v12 is to fix the build error when > > > CONFIG_FORCE_NR_CPUS is present. > > > > > > The commit message for removing initial stack has also been improved, typos > > > have been fixed and extra comments have been added to make code clearer. > > > > Might something like this make it work in parallel with SEV-SNP? If so, > > I can clean it up and adjust the C code to actually invoke it... > > This should be ok for both SEV-ES and SEV-SNP. Thanks. So... something like this then? Is static_branch_unlikely(&sev_es_enable_key) the right thing to use, and does that cover SNP too? Pushed to https://git.infradead.org/users/dwmw2/linux.git/shortlog/refs/heads/parallel-6.2-rc8-v12bis From d03aed91e5cfe840f4b18820fe6aed1765687321 Mon Sep 17 00:00:00 2001 From: David Woodhouse <dwmw@amazon.co.uk> Date: Tue, 7 Mar 2023 19:06:50 +0000 Subject: [PATCH] x86/smpboot: Allow parallel bringup for SEV-ES Enable parallel bringup for SEV-ES guests. The APs can't actually execute the CPUID instruction directly during early startup, but they can make the GHCB call directly instead, just as the VC trap handler would do. Factor out a prepare_parallel_bringup() function to help reduce the level of complexity by allowing a simple 'return false' in the bail-out cases/ Thanks to Sabin for talking me through the way this works. Suggested-by: Sabin Rapan <sabrapan@amazon.com> Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> --- arch/x86/include/asm/sev-common.h | 3 + arch/x86/include/asm/smp.h | 3 +- arch/x86/kernel/head_64.S | 27 ++++++- arch/x86/kernel/smpboot.c | 112 ++++++++++++++++++------------ 4 files changed, 98 insertions(+), 47 deletions(-) diff --git a/arch/x86/include/asm/sev-common.h b/arch/x86/include/asm/sev-common.h index b8357d6ecd47..f25df4bd318e 100644 --- a/arch/x86/include/asm/sev-common.h +++ b/arch/x86/include/asm/sev-common.h @@ -70,6 +70,7 @@ /* GHCBData[63:12] */ \ (((u64)(v) & GENMASK_ULL(63, 12)) >> 12) +#ifndef __ASSEMBLY__ /* * SNP Page State Change Operation * @@ -160,6 +161,8 @@ struct snp_psc_desc { #define GHCB_RESP_CODE(v) ((v) & GHCB_MSR_INFO_MASK) +#endif /* __ASSEMBLY__ */ + /* * Error codes related to GHCB input that can be communicated back to the guest * by setting the lower 32-bits of the GHCB SW_EXITINFO1 field to 2. diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h index defe76ee9e64..b3f67a764bfa 100644 --- a/arch/x86/include/asm/smp.h +++ b/arch/x86/include/asm/smp.h @@ -204,7 +204,8 @@ extern unsigned int smpboot_control; /* Control bits for startup_64 */ #define STARTUP_APICID_CPUID_0B 0x80000000 #define STARTUP_APICID_CPUID_01 0x40000000 +#define STARTUP_APICID_SEV_ES 0x20000000 -#define STARTUP_PARALLEL_MASK (STARTUP_APICID_CPUID_01 | STARTUP_APICID_CPUID_0B) +#define STARTUP_PARALLEL_MASK (STARTUP_APICID_CPUID_01 | STARTUP_APICID_CPUID_0B | STARTUP_APICID_SEV_ES) #endif /* _ASM_X86_SMP_H */ diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S index c35f7c173832..3f5904eab678 100644 --- a/arch/x86/kernel/head_64.S +++ b/arch/x86/kernel/head_64.S @@ -26,7 +26,7 @@ #include <asm/nospec-branch.h> #include <asm/fixmap.h> #include <asm/smp.h> - +#include <asm/sev-common.h> /* * We are not able to switch in one step to the final KERNEL ADDRESS SPACE * because we need identity-mapped pages. @@ -242,6 +242,7 @@ SYM_INNER_LABEL(secondary_startup_64_no_verify, SYM_L_GLOBAL) * * Bit 31 STARTUP_APICID_CPUID_0B flag (use CPUID 0x0b) * Bit 30 STARTUP_APICID_CPUID_01 flag (use CPUID 0x01) + * Bit 29 STARTUP_APICID_SEV_ES flag (CPUID 0x0b via GHCB MSR) * Bit 0-24 CPU# if STARTUP_APICID_CPUID_xx flags are not set */ movl smpboot_control(%rip), %ecx @@ -249,6 +250,8 @@ SYM_INNER_LABEL(secondary_startup_64_no_verify, SYM_L_GLOBAL) jnz .Luse_cpuid_0b testl $STARTUP_APICID_CPUID_01, %ecx jnz .Luse_cpuid_01 + testl $STARTUP_APICID_SEV_ES, %ecx + jnz .Luse_sev_cpuid_0b andl $0x0FFFFFFF, %ecx jmp .Lsetup_cpu @@ -259,6 +262,28 @@ SYM_INNER_LABEL(secondary_startup_64_no_verify, SYM_L_GLOBAL) shr $24, %edx jmp .Lsetup_AP +.Luse_sev_cpuid_0b: + /* Set the GHCB MSR to request CPUID 0xB_EDX */ + movl $MSR_AMD64_SEV_ES_GHCB, %ecx + movl $(GHCB_CPUID_REQ_EDX << 30) | GHCB_MSR_CPUID_REQ, %eax + movl $0x0B, %edx + wrmsr + + /* Perform GHCB MSR protocol */ + vmgexit + + /* + * Get the result. After the RDMSR: + * EAX should be 0xc0000005 + * EDX should have the CPUID register value and since EDX + * is the target register, no need to move the result. + */ + rdmsr + andl $GHCB_MSR_INFO_MASK, %eax + cmpl $GHCB_MSR_CPUID_RESP, %eax + jne 1f + jmp .Lsetup_AP + .Luse_cpuid_0b: mov $0x0B, %eax xorl %ecx, %ecx diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 9d956571ecc1..d194c4ffeef8 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -1510,6 +1510,71 @@ void __init smp_prepare_cpus_common(void) set_cpu_sibling_map(0); } + +/* + * We can do 64-bit AP bringup in parallel if the CPU reports its APIC + * ID in CPUID (either leaf 0x0B if we need the full APIC ID in X2APIC + * mode, or leaf 0x01 if 8 bits are sufficient). Otherwise it's too + * hard. And not for SEV-ES guests because they can't use CPUID that + * early. + */ +static bool __init prepare_parallel_bringup(void) +{ + if (IS_ENABLED(CONFIG_X86_32) || boot_cpu_data.cpuid_level < 1) + return false; + + if (x2apic_mode) { + unsigned int eax, ebx, ecx, edx; + + if (boot_cpu_data.cpuid_level < 0xb) + return false; + + /* + * To support parallel bringup in x2apic mode, the AP will need + * to obtain its APIC ID from CPUID 0x0B, since CPUID 0x01 has + * only 8 bits. Check that it is present and seems correct. + */ + cpuid_count(0xb, 0, &eax, &ebx, &ecx, &edx); + + /* + * AMD says that if executed with an umimplemented level in + * ECX, then it will return all zeroes in EAX. Intel says it + * will return zeroes in both EAX and EBX. Checking only EAX + * should be sufficient. + */ + if (!eax) { + pr_info("Disabling parallel bringup because CPUID 0xb looks untrustworthy\n"); + return false; + } + + if (IS_ENABLED(AMD_MEM_ENCRYPT) && static_branch_unlikely(&sev_es_enable_key)) { + pr_debug("Using SEV-ES CPUID 0xb for parallel CPU startup\n"); + smpboot_control = STARTUP_APICID_SEV_ES; + } else if (cc_platform_has(CC_ATTR_GUEST_STATE_ENCRYPT)) { + /* + * Other forms of memory encryption need to implement a way of + * finding the APs' APIC IDs that early. + */ + return false; + } else { + pr_debug("Using CPUID 0xb for parallel CPU startup\n"); + smpboot_control = STARTUP_APICID_CPUID_0B; + } + } else { + if (cc_platform_has(CC_ATTR_GUEST_STATE_ENCRYPT)) + return false; + + /* Without X2APIC, what's in CPUID 0x01 should suffice. */ + pr_debug("Using CPUID 0x1 for parallel CPU startup\n"); + smpboot_control = STARTUP_APICID_CPUID_01; + } + + cpuhp_setup_state_nocalls(CPUHP_BP_PARALLEL_DYN, "x86/cpu:kick", + native_cpu_kick, NULL); + + return true; +} + /* * Prepare for SMP bootup. * @max_cpus: configured maximum number of CPUs, It is a legacy parameter @@ -1550,51 +1615,8 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus) speculative_store_bypass_ht_init(); - /* - * We can do 64-bit AP bringup in parallel if the CPU reports - * its APIC ID in CPUID (either leaf 0x0B if we need the full - * APIC ID in X2APIC mode, or leaf 0x01 if 8 bits are - * sufficient). Otherwise it's too hard. And not for SEV-ES - * guests because they can't use CPUID that early. - */ - if (IS_ENABLED(CONFIG_X86_32) || boot_cpu_data.cpuid_level < 1 || - (x2apic_mode && boot_cpu_data.cpuid_level < 0xb) || - cc_platform_has(CC_ATTR_GUEST_STATE_ENCRYPT)) - do_parallel_bringup = false; - - if (do_parallel_bringup && x2apic_mode) { - unsigned int eax, ebx, ecx, edx; - - /* - * To support parallel bringup in x2apic mode, the AP will need - * to obtain its APIC ID from CPUID 0x0B, since CPUID 0x01 has - * only 8 bits. Check that it is present and seems correct. - */ - cpuid_count(0xb, 0, &eax, &ebx, &ecx, &edx); - - /* - * AMD says that if executed with an umimplemented level in - * ECX, then it will return all zeroes in EAX. Intel says it - * will return zeroes in both EAX and EBX. Checking only EAX - * should be sufficient. - */ - if (eax) { - pr_debug("Using CPUID 0xb for parallel CPU startup\n"); - smpboot_control = STARTUP_APICID_CPUID_0B; - } else { - pr_info("Disabling parallel bringup because CPUID 0xb looks untrustworthy\n"); - do_parallel_bringup = false; - } - } else if (do_parallel_bringup) { - /* Without X2APIC, what's in CPUID 0x01 should suffice. */ - pr_debug("Using CPUID 0x1 for parallel CPU startup\n"); - smpboot_control = STARTUP_APICID_CPUID_01; - } - - if (do_parallel_bringup) { - cpuhp_setup_state_nocalls(CPUHP_BP_PARALLEL_DYN, "x86/cpu:kick", - native_cpu_kick, NULL); - } + if (do_parallel_bringup) + do_parallel_bringup = prepare_parallel_bringup(); snp_set_wakeup_secondary_cpu(); }
On 3/7/23 13:18, David Woodhouse wrote: > On Tue, 2023-03-07 at 10:45 -0600, Tom Lendacky wrote: >> On 3/7/23 08:42, David Woodhouse wrote: >>> On Thu, 2023-03-02 at 11:12 +0000, Usama Arif wrote: >>>> The main code change over v12 is to fix the build error when >>>> CONFIG_FORCE_NR_CPUS is present. >>>> >>>> The commit message for removing initial stack has also been improved, typos >>>> have been fixed and extra comments have been added to make code clearer. >>> >>> Might something like this make it work in parallel with SEV-SNP? If so, >>> I can clean it up and adjust the C code to actually invoke it... >> >> This should be ok for both SEV-ES and SEV-SNP. > > Thanks. So... something like this then? > > Is static_branch_unlikely(&sev_es_enable_key) the right thing to use, > and does that cover SNP too? Yes, that will cover SNP, too. > > Pushed to > https://git.infradead.org/users/dwmw2/linux.git/shortlog/refs/heads/parallel-6.2-rc8-v12bis > > From d03aed91e5cfe840f4b18820fe6aed1765687321 Mon Sep 17 00:00:00 2001 > From: David Woodhouse <dwmw@amazon.co.uk> > Date: Tue, 7 Mar 2023 19:06:50 +0000 > Subject: [PATCH] x86/smpboot: Allow parallel bringup for SEV-ES > > Enable parallel bringup for SEV-ES guests. The APs can't actually > execute the CPUID instruction directly during early startup, but they > can make the GHCB call directly instead, just as the VC trap handler > would do. > > Factor out a prepare_parallel_bringup() function to help reduce the level > of complexity by allowing a simple 'return false' in the bail-out cases/ > > Thanks to Sabin for talking me through the way this works. > > Suggested-by: Sabin Rapan <sabrapan@amazon.com> > Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> > --- > arch/x86/include/asm/sev-common.h | 3 + > arch/x86/include/asm/smp.h | 3 +- > arch/x86/kernel/head_64.S | 27 ++++++- > arch/x86/kernel/smpboot.c | 112 ++++++++++++++++++------------ > 4 files changed, 98 insertions(+), 47 deletions(-) > > diff --git a/arch/x86/include/asm/sev-common.h b/arch/x86/include/asm/sev-common.h > index b8357d6ecd47..f25df4bd318e 100644 > --- a/arch/x86/include/asm/sev-common.h > +++ b/arch/x86/include/asm/sev-common.h > @@ -70,6 +70,7 @@ > /* GHCBData[63:12] */ \ > (((u64)(v) & GENMASK_ULL(63, 12)) >> 12) > > +#ifndef __ASSEMBLY__ > /* > * SNP Page State Change Operation > * > @@ -160,6 +161,8 @@ struct snp_psc_desc { > > #define GHCB_RESP_CODE(v) ((v) & GHCB_MSR_INFO_MASK) > > +#endif /* __ASSEMBLY__ */ > + > /* > * Error codes related to GHCB input that can be communicated back to the guest > * by setting the lower 32-bits of the GHCB SW_EXITINFO1 field to 2. > diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h > index defe76ee9e64..b3f67a764bfa 100644 > --- a/arch/x86/include/asm/smp.h > +++ b/arch/x86/include/asm/smp.h > @@ -204,7 +204,8 @@ extern unsigned int smpboot_control; > /* Control bits for startup_64 */ > #define STARTUP_APICID_CPUID_0B 0x80000000 > #define STARTUP_APICID_CPUID_01 0x40000000 > +#define STARTUP_APICID_SEV_ES 0x20000000 > > -#define STARTUP_PARALLEL_MASK (STARTUP_APICID_CPUID_01 | STARTUP_APICID_CPUID_0B) > +#define STARTUP_PARALLEL_MASK (STARTUP_APICID_CPUID_01 | STARTUP_APICID_CPUID_0B | STARTUP_APICID_SEV_ES) > > #endif /* _ASM_X86_SMP_H */ > diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S > index c35f7c173832..3f5904eab678 100644 > --- a/arch/x86/kernel/head_64.S > +++ b/arch/x86/kernel/head_64.S > @@ -26,7 +26,7 @@ > #include <asm/nospec-branch.h> > #include <asm/fixmap.h> > #include <asm/smp.h> > - > +#include <asm/sev-common.h> > /* > * We are not able to switch in one step to the final KERNEL ADDRESS SPACE > * because we need identity-mapped pages. > @@ -242,6 +242,7 @@ SYM_INNER_LABEL(secondary_startup_64_no_verify, SYM_L_GLOBAL) > * > * Bit 31 STARTUP_APICID_CPUID_0B flag (use CPUID 0x0b) > * Bit 30 STARTUP_APICID_CPUID_01 flag (use CPUID 0x01) > + * Bit 29 STARTUP_APICID_SEV_ES flag (CPUID 0x0b via GHCB MSR) > * Bit 0-24 CPU# if STARTUP_APICID_CPUID_xx flags are not set > */ > movl smpboot_control(%rip), %ecx > @@ -249,6 +250,8 @@ SYM_INNER_LABEL(secondary_startup_64_no_verify, SYM_L_GLOBAL) > jnz .Luse_cpuid_0b > testl $STARTUP_APICID_CPUID_01, %ecx > jnz .Luse_cpuid_01 > + testl $STARTUP_APICID_SEV_ES, %ecx > + jnz .Luse_sev_cpuid_0b > andl $0x0FFFFFFF, %ecx > jmp .Lsetup_cpu > > @@ -259,6 +262,28 @@ SYM_INNER_LABEL(secondary_startup_64_no_verify, SYM_L_GLOBAL) > shr $24, %edx > jmp .Lsetup_AP > > +.Luse_sev_cpuid_0b: > + /* Set the GHCB MSR to request CPUID 0xB_EDX */ > + movl $MSR_AMD64_SEV_ES_GHCB, %ecx > + movl $(GHCB_CPUID_REQ_EDX << 30) | GHCB_MSR_CPUID_REQ, %eax > + movl $0x0B, %edx > + wrmsr > + > + /* Perform GHCB MSR protocol */ > + vmgexit > + > + /* > + * Get the result. After the RDMSR: > + * EAX should be 0xc0000005 > + * EDX should have the CPUID register value and since EDX > + * is the target register, no need to move the result. > + */ > + rdmsr > + andl $GHCB_MSR_INFO_MASK, %eax > + cmpl $GHCB_MSR_CPUID_RESP, %eax > + jne 1f > + jmp .Lsetup_AP > + > .Luse_cpuid_0b: > mov $0x0B, %eax > xorl %ecx, %ecx > diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c > index 9d956571ecc1..d194c4ffeef8 100644 > --- a/arch/x86/kernel/smpboot.c > +++ b/arch/x86/kernel/smpboot.c > @@ -1510,6 +1510,71 @@ void __init smp_prepare_cpus_common(void) > set_cpu_sibling_map(0); > } > > + > +/* > + * We can do 64-bit AP bringup in parallel if the CPU reports its APIC > + * ID in CPUID (either leaf 0x0B if we need the full APIC ID in X2APIC > + * mode, or leaf 0x01 if 8 bits are sufficient). Otherwise it's too > + * hard. And not for SEV-ES guests because they can't use CPUID that > + * early. > + */ > +static bool __init prepare_parallel_bringup(void) > +{ > + if (IS_ENABLED(CONFIG_X86_32) || boot_cpu_data.cpuid_level < 1) > + return false; > + > + if (x2apic_mode) { > + unsigned int eax, ebx, ecx, edx; > + > + if (boot_cpu_data.cpuid_level < 0xb) > + return false; > + > + /* > + * To support parallel bringup in x2apic mode, the AP will need > + * to obtain its APIC ID from CPUID 0x0B, since CPUID 0x01 has > + * only 8 bits. Check that it is present and seems correct. > + */ > + cpuid_count(0xb, 0, &eax, &ebx, &ecx, &edx); > + > + /* > + * AMD says that if executed with an umimplemented level in > + * ECX, then it will return all zeroes in EAX. Intel says it > + * will return zeroes in both EAX and EBX. Checking only EAX > + * should be sufficient. > + */ > + if (!eax) { > + pr_info("Disabling parallel bringup because CPUID 0xb looks untrustworthy\n"); > + return false; > + } > + > + if (IS_ENABLED(AMD_MEM_ENCRYPT) && static_branch_unlikely(&sev_es_enable_key)) { This should be IS_ENABLED(CONFIG_AMD_MEM_ENCRYPT) Let me take this patch and run some tests to confirm that everything works as expected. Thanks! Tom > + pr_debug("Using SEV-ES CPUID 0xb for parallel CPU startup\n"); > + smpboot_control = STARTUP_APICID_SEV_ES; > + } else if (cc_platform_has(CC_ATTR_GUEST_STATE_ENCRYPT)) { > + /* > + * Other forms of memory encryption need to implement a way of > + * finding the APs' APIC IDs that early. > + */ > + return false; > + } else { > + pr_debug("Using CPUID 0xb for parallel CPU startup\n"); > + smpboot_control = STARTUP_APICID_CPUID_0B; > + } > + } else { > + if (cc_platform_has(CC_ATTR_GUEST_STATE_ENCRYPT)) > + return false; > + > + /* Without X2APIC, what's in CPUID 0x01 should suffice. */ > + pr_debug("Using CPUID 0x1 for parallel CPU startup\n"); > + smpboot_control = STARTUP_APICID_CPUID_01; > + } > + > + cpuhp_setup_state_nocalls(CPUHP_BP_PARALLEL_DYN, "x86/cpu:kick", > + native_cpu_kick, NULL); > + > + return true; > +} > + > /* > * Prepare for SMP bootup. > * @max_cpus: configured maximum number of CPUs, It is a legacy parameter > @@ -1550,51 +1615,8 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus) > > speculative_store_bypass_ht_init(); > > - /* > - * We can do 64-bit AP bringup in parallel if the CPU reports > - * its APIC ID in CPUID (either leaf 0x0B if we need the full > - * APIC ID in X2APIC mode, or leaf 0x01 if 8 bits are > - * sufficient). Otherwise it's too hard. And not for SEV-ES > - * guests because they can't use CPUID that early. > - */ > - if (IS_ENABLED(CONFIG_X86_32) || boot_cpu_data.cpuid_level < 1 || > - (x2apic_mode && boot_cpu_data.cpuid_level < 0xb) || > - cc_platform_has(CC_ATTR_GUEST_STATE_ENCRYPT)) > - do_parallel_bringup = false; > - > - if (do_parallel_bringup && x2apic_mode) { > - unsigned int eax, ebx, ecx, edx; > - > - /* > - * To support parallel bringup in x2apic mode, the AP will need > - * to obtain its APIC ID from CPUID 0x0B, since CPUID 0x01 has > - * only 8 bits. Check that it is present and seems correct. > - */ > - cpuid_count(0xb, 0, &eax, &ebx, &ecx, &edx); > - > - /* > - * AMD says that if executed with an umimplemented level in > - * ECX, then it will return all zeroes in EAX. Intel says it > - * will return zeroes in both EAX and EBX. Checking only EAX > - * should be sufficient. > - */ > - if (eax) { > - pr_debug("Using CPUID 0xb for parallel CPU startup\n"); > - smpboot_control = STARTUP_APICID_CPUID_0B; > - } else { > - pr_info("Disabling parallel bringup because CPUID 0xb looks untrustworthy\n"); > - do_parallel_bringup = false; > - } > - } else if (do_parallel_bringup) { > - /* Without X2APIC, what's in CPUID 0x01 should suffice. */ > - pr_debug("Using CPUID 0x1 for parallel CPU startup\n"); > - smpboot_control = STARTUP_APICID_CPUID_01; > - } > - > - if (do_parallel_bringup) { > - cpuhp_setup_state_nocalls(CPUHP_BP_PARALLEL_DYN, "x86/cpu:kick", > - native_cpu_kick, NULL); > - } > + if (do_parallel_bringup) > + do_parallel_bringup = prepare_parallel_bringup(); > > snp_set_wakeup_secondary_cpu(); > }
> diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c > index 9d956571ecc1..d194c4ffeef8 100644 > --- a/arch/x86/kernel/smpboot.c > +++ b/arch/x86/kernel/smpboot.c > @@ -1510,6 +1510,71 @@ void __init smp_prepare_cpus_common(void) > set_cpu_sibling_map(0); > } > > + > +/* > + * We can do 64-bit AP bringup in parallel if the CPU reports its APIC > + * ID in CPUID (either leaf 0x0B if we need the full APIC ID in X2APIC > + * mode, or leaf 0x01 if 8 bits are sufficient). Otherwise it's too > + * hard. And not for SEV-ES guests because they can't use CPUID that > + * early. > + */ > +static bool __init prepare_parallel_bringup(void) > +{ > + if (IS_ENABLED(CONFIG_X86_32) || boot_cpu_data.cpuid_level < 1) > + return false; > + > + if (x2apic_mode) { > + unsigned int eax, ebx, ecx, edx; > + > + if (boot_cpu_data.cpuid_level < 0xb) > + return false; > + > + /* > + * To support parallel bringup in x2apic mode, the AP will need > + * to obtain its APIC ID from CPUID 0x0B, since CPUID 0x01 has > + * only 8 bits. Check that it is present and seems correct. > + */ > + cpuid_count(0xb, 0, &eax, &ebx, &ecx, &edx); > + > + /* > + * AMD says that if executed with an umimplemented level in > + * ECX, then it will return all zeroes in EAX. Intel says it > + * will return zeroes in both EAX and EBX. Checking only EAX > + * should be sufficient. > + */ > + if (!eax) { > + pr_info("Disabling parallel bringup because CPUID 0xb looks untrustworthy\n"); > + return false; > + } > + > + if (IS_ENABLED(AMD_MEM_ENCRYPT) && static_branch_unlikely(&sev_es_enable_key)) { > + pr_debug("Using SEV-ES CPUID 0xb for parallel CPU startup\n"); > + smpboot_control = STARTUP_APICID_SEV_ES; > + } else if (cc_platform_has(CC_ATTR_GUEST_STATE_ENCRYPT)) { > + /* > + * Other forms of memory encryption need to implement a way of > + * finding the APs' APIC IDs that early. > + */ > + return false; > + } else { > + pr_debug("Using CPUID 0xb for parallel CPU startup\n"); > + smpboot_control = STARTUP_APICID_CPUID_0B; I believe TDX guests with x2apic mode will end up here and enable parallel smp if Sean was correct in this (https://lore.kernel.org/all/Y91PoIfc2jdRv0WG@google.com/). i.e. "TDX guest state is also encrypted, but TDX doesn't return true CC_ATTR_GUEST_STATE_ENCRYPT.". So I believe the above else if (cc_platform_has(CC_ATTR_GUEST_STATE_ENCRYPT)) is not useful as thats set for just SEV-ES guests? which is covered in the if part. Thanks, Usama > + } > + } else { > + if (cc_platform_has(CC_ATTR_GUEST_STATE_ENCRYPT)) > + return false; > + > + /* Without X2APIC, what's in CPUID 0x01 should suffice. */ > + pr_debug("Using CPUID 0x1 for parallel CPU startup\n"); > + smpboot_control = STARTUP_APICID_CPUID_01; > + } > + > + cpuhp_setup_state_nocalls(CPUHP_BP_PARALLEL_DYN, "x86/cpu:kick", > + native_cpu_kick, NULL); > + > + return true; > +} > + > /* > * Prepare for SMP bootup.
On 3/7/23 14:06, Tom Lendacky wrote: > On 3/7/23 13:18, David Woodhouse wrote: >> On Tue, 2023-03-07 at 10:45 -0600, Tom Lendacky wrote: >>> On 3/7/23 08:42, David Woodhouse wrote: >>>> On Thu, 2023-03-02 at 11:12 +0000, Usama Arif wrote: >>>>> The main code change over v12 is to fix the build error when >>>>> CONFIG_FORCE_NR_CPUS is present. >>>>> >>>>> The commit message for removing initial stack has also been improved, >>>>> typos >>>>> have been fixed and extra comments have been added to make code clearer. >>>> >>>> Might something like this make it work in parallel with SEV-SNP? If so, >>>> I can clean it up and adjust the C code to actually invoke it... >>> >>> This should be ok for both SEV-ES and SEV-SNP. >> >> Thanks. So... something like this then? >> >> Is static_branch_unlikely(&sev_es_enable_key) the right thing to use, >> and does that cover SNP too? > > Yes, that will cover SNP, too. > >> >> Pushed to >> https://git.infradead.org/users/dwmw2/linux.git/shortlog/refs/heads/parallel-6.2-rc8-v12bis >> >> From d03aed91e5cfe840f4b18820fe6aed1765687321 Mon Sep 17 00:00:00 2001 >> From: David Woodhouse <dwmw@amazon.co.uk> >> Date: Tue, 7 Mar 2023 19:06:50 +0000 >> Subject: [PATCH] x86/smpboot: Allow parallel bringup for SEV-ES >> >> Enable parallel bringup for SEV-ES guests. The APs can't actually >> execute the CPUID instruction directly during early startup, but they >> can make the GHCB call directly instead, just as the VC trap handler >> would do. >> >> Factor out a prepare_parallel_bringup() function to help reduce the level >> of complexity by allowing a simple 'return false' in the bail-out cases/ >> >> Thanks to Sabin for talking me through the way this works. >> >> Suggested-by: Sabin Rapan <sabrapan@amazon.com> >> Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> I did some Qemu/KVM testing. One thing I noticed is that on AMD, CPUID 0xB EAX will be non-zero only if SMT is enabled. So just booting some guests without CPU topology never did parallel booting ("smpboot: Disabling parallel bringup because CPUID 0xb looks untrustworthy"). I would imagine a bare-metal system that has diabled SMT will not do parallel booting, too (but I haven't had time to test that). I did have to change "vmgexit" to a "rep; vmmcall" based on my binutils and the IS_ENABLED(CONFIG_AMD_MEM_ENCRYPT) change I mentioned before. But with that, I was able to successfully boot 64 vCPU SEV-ES and SEV-SNP guests using parallel booting. Thanks, Tom >> --- >> arch/x86/include/asm/sev-common.h | 3 + >> arch/x86/include/asm/smp.h | 3 +- >> arch/x86/kernel/head_64.S | 27 ++++++- >> arch/x86/kernel/smpboot.c | 112 ++++++++++++++++++------------ >> 4 files changed, 98 insertions(+), 47 deletions(-) >> >> diff --git a/arch/x86/include/asm/sev-common.h >> b/arch/x86/include/asm/sev-common.h >> index b8357d6ecd47..f25df4bd318e 100644 >> --- a/arch/x86/include/asm/sev-common.h >> +++ b/arch/x86/include/asm/sev-common.h >> @@ -70,6 +70,7 @@ >> /* GHCBData[63:12] */ \ >> (((u64)(v) & GENMASK_ULL(63, 12)) >> 12) >> +#ifndef __ASSEMBLY__ >> /* >> * SNP Page State Change Operation >> * >> @@ -160,6 +161,8 @@ struct snp_psc_desc { >> #define GHCB_RESP_CODE(v) ((v) & GHCB_MSR_INFO_MASK) >> +#endif /* __ASSEMBLY__ */ >> + >> /* >> * Error codes related to GHCB input that can be communicated back to >> the guest >> * by setting the lower 32-bits of the GHCB SW_EXITINFO1 field to 2. >> diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h >> index defe76ee9e64..b3f67a764bfa 100644 >> --- a/arch/x86/include/asm/smp.h >> +++ b/arch/x86/include/asm/smp.h >> @@ -204,7 +204,8 @@ extern unsigned int smpboot_control; >> /* Control bits for startup_64 */ >> #define STARTUP_APICID_CPUID_0B 0x80000000 >> #define STARTUP_APICID_CPUID_01 0x40000000 >> +#define STARTUP_APICID_SEV_ES 0x20000000 >> -#define STARTUP_PARALLEL_MASK (STARTUP_APICID_CPUID_01 | >> STARTUP_APICID_CPUID_0B) >> +#define STARTUP_PARALLEL_MASK (STARTUP_APICID_CPUID_01 | >> STARTUP_APICID_CPUID_0B | STARTUP_APICID_SEV_ES) >> #endif /* _ASM_X86_SMP_H */ >> diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S >> index c35f7c173832..3f5904eab678 100644 >> --- a/arch/x86/kernel/head_64.S >> +++ b/arch/x86/kernel/head_64.S >> @@ -26,7 +26,7 @@ >> #include <asm/nospec-branch.h> >> #include <asm/fixmap.h> >> #include <asm/smp.h> >> - >> +#include <asm/sev-common.h> >> /* >> * We are not able to switch in one step to the final KERNEL ADDRESS >> SPACE >> * because we need identity-mapped pages. >> @@ -242,6 +242,7 @@ SYM_INNER_LABEL(secondary_startup_64_no_verify, >> SYM_L_GLOBAL) >> * >> * Bit 31 STARTUP_APICID_CPUID_0B flag (use CPUID 0x0b) >> * Bit 30 STARTUP_APICID_CPUID_01 flag (use CPUID 0x01) >> + * Bit 29 STARTUP_APICID_SEV_ES flag (CPUID 0x0b via GHCB MSR) >> * Bit 0-24 CPU# if STARTUP_APICID_CPUID_xx flags are not set >> */ >> movl smpboot_control(%rip), %ecx >> @@ -249,6 +250,8 @@ SYM_INNER_LABEL(secondary_startup_64_no_verify, >> SYM_L_GLOBAL) >> jnz .Luse_cpuid_0b >> testl $STARTUP_APICID_CPUID_01, %ecx >> jnz .Luse_cpuid_01 >> + testl $STARTUP_APICID_SEV_ES, %ecx >> + jnz .Luse_sev_cpuid_0b >> andl $0x0FFFFFFF, %ecx >> jmp .Lsetup_cpu >> @@ -259,6 +262,28 @@ SYM_INNER_LABEL(secondary_startup_64_no_verify, >> SYM_L_GLOBAL) >> shr $24, %edx >> jmp .Lsetup_AP >> +.Luse_sev_cpuid_0b: >> + /* Set the GHCB MSR to request CPUID 0xB_EDX */ >> + movl $MSR_AMD64_SEV_ES_GHCB, %ecx >> + movl $(GHCB_CPUID_REQ_EDX << 30) | GHCB_MSR_CPUID_REQ, %eax >> + movl $0x0B, %edx >> + wrmsr >> + >> + /* Perform GHCB MSR protocol */ >> + vmgexit >> + >> + /* >> + * Get the result. After the RDMSR: >> + * EAX should be 0xc0000005 >> + * EDX should have the CPUID register value and since EDX >> + * is the target register, no need to move the result. >> + */ >> + rdmsr >> + andl $GHCB_MSR_INFO_MASK, %eax >> + cmpl $GHCB_MSR_CPUID_RESP, %eax >> + jne 1f >> + jmp .Lsetup_AP >> + >> .Luse_cpuid_0b: >> mov $0x0B, %eax >> xorl %ecx, %ecx >> diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c >> index 9d956571ecc1..d194c4ffeef8 100644 >> --- a/arch/x86/kernel/smpboot.c >> +++ b/arch/x86/kernel/smpboot.c >> @@ -1510,6 +1510,71 @@ void __init smp_prepare_cpus_common(void) >> set_cpu_sibling_map(0); >> } >> + >> +/* >> + * We can do 64-bit AP bringup in parallel if the CPU reports its APIC >> + * ID in CPUID (either leaf 0x0B if we need the full APIC ID in X2APIC >> + * mode, or leaf 0x01 if 8 bits are sufficient). Otherwise it's too >> + * hard. And not for SEV-ES guests because they can't use CPUID that >> + * early. >> + */ >> +static bool __init prepare_parallel_bringup(void) >> +{ >> + if (IS_ENABLED(CONFIG_X86_32) || boot_cpu_data.cpuid_level < 1) >> + return false; >> + >> + if (x2apic_mode) { >> + unsigned int eax, ebx, ecx, edx; >> + >> + if (boot_cpu_data.cpuid_level < 0xb) >> + return false; >> + >> + /* >> + * To support parallel bringup in x2apic mode, the AP will need >> + * to obtain its APIC ID from CPUID 0x0B, since CPUID 0x01 has >> + * only 8 bits. Check that it is present and seems correct. >> + */ >> + cpuid_count(0xb, 0, &eax, &ebx, &ecx, &edx); >> + >> + /* >> + * AMD says that if executed with an umimplemented level in >> + * ECX, then it will return all zeroes in EAX. Intel says it >> + * will return zeroes in both EAX and EBX. Checking only EAX >> + * should be sufficient. >> + */ >> + if (!eax) { >> + pr_info("Disabling parallel bringup because CPUID 0xb looks >> untrustworthy\n"); >> + return false; >> + } >> + >> + if (IS_ENABLED(AMD_MEM_ENCRYPT) && >> static_branch_unlikely(&sev_es_enable_key)) { > > This should be IS_ENABLED(CONFIG_AMD_MEM_ENCRYPT) > > Let me take this patch and run some tests to confirm that everything works > as expected. > > Thanks! > Tom > >> + pr_debug("Using SEV-ES CPUID 0xb for parallel CPU startup\n"); >> + smpboot_control = STARTUP_APICID_SEV_ES; >> + } else if (cc_platform_has(CC_ATTR_GUEST_STATE_ENCRYPT)) { >> + /* >> + * Other forms of memory encryption need to implement a way of >> + * finding the APs' APIC IDs that early. >> + */ >> + return false; >> + } else { >> + pr_debug("Using CPUID 0xb for parallel CPU startup\n"); >> + smpboot_control = STARTUP_APICID_CPUID_0B; >> + } >> + } else { >> + if (cc_platform_has(CC_ATTR_GUEST_STATE_ENCRYPT)) >> + return false; >> + >> + /* Without X2APIC, what's in CPUID 0x01 should suffice. */ >> + pr_debug("Using CPUID 0x1 for parallel CPU startup\n"); >> + smpboot_control = STARTUP_APICID_CPUID_01; >> + } >> + >> + cpuhp_setup_state_nocalls(CPUHP_BP_PARALLEL_DYN, "x86/cpu:kick", >> + native_cpu_kick, NULL); >> + >> + return true; >> +} >> + >> /* >> * Prepare for SMP bootup. >> * @max_cpus: configured maximum number of CPUs, It is a legacy parameter >> @@ -1550,51 +1615,8 @@ void __init native_smp_prepare_cpus(unsigned int >> max_cpus) >> speculative_store_bypass_ht_init(); >> - /* >> - * We can do 64-bit AP bringup in parallel if the CPU reports >> - * its APIC ID in CPUID (either leaf 0x0B if we need the full >> - * APIC ID in X2APIC mode, or leaf 0x01 if 8 bits are >> - * sufficient). Otherwise it's too hard. And not for SEV-ES >> - * guests because they can't use CPUID that early. >> - */ >> - if (IS_ENABLED(CONFIG_X86_32) || boot_cpu_data.cpuid_level < 1 || >> - (x2apic_mode && boot_cpu_data.cpuid_level < 0xb) || >> - cc_platform_has(CC_ATTR_GUEST_STATE_ENCRYPT)) >> - do_parallel_bringup = false; >> - >> - if (do_parallel_bringup && x2apic_mode) { >> - unsigned int eax, ebx, ecx, edx; >> - >> - /* >> - * To support parallel bringup in x2apic mode, the AP will need >> - * to obtain its APIC ID from CPUID 0x0B, since CPUID 0x01 has >> - * only 8 bits. Check that it is present and seems correct. >> - */ >> - cpuid_count(0xb, 0, &eax, &ebx, &ecx, &edx); >> - >> - /* >> - * AMD says that if executed with an umimplemented level in >> - * ECX, then it will return all zeroes in EAX. Intel says it >> - * will return zeroes in both EAX and EBX. Checking only EAX >> - * should be sufficient. >> - */ >> - if (eax) { >> - pr_debug("Using CPUID 0xb for parallel CPU startup\n"); >> - smpboot_control = STARTUP_APICID_CPUID_0B; >> - } else { >> - pr_info("Disabling parallel bringup because CPUID 0xb looks >> untrustworthy\n"); >> - do_parallel_bringup = false; >> - } >> - } else if (do_parallel_bringup) { >> - /* Without X2APIC, what's in CPUID 0x01 should suffice. */ >> - pr_debug("Using CPUID 0x1 for parallel CPU startup\n"); >> - smpboot_control = STARTUP_APICID_CPUID_01; >> - } >> - >> - if (do_parallel_bringup) { >> - cpuhp_setup_state_nocalls(CPUHP_BP_PARALLEL_DYN, "x86/cpu:kick", >> - native_cpu_kick, NULL); >> - } >> + if (do_parallel_bringup) >> + do_parallel_bringup = prepare_parallel_bringup(); >> snp_set_wakeup_secondary_cpu(); >> }
On Tue, 2023-03-07 at 16:22 -0600, Tom Lendacky wrote: > > I did some Qemu/KVM testing. One thing I noticed is that on AMD, CPUID 0xB > EAX will be non-zero only if SMT is enabled. So just booting some guests > without CPU topology never did parallel booting ("smpboot: Disabling > parallel bringup because CPUID 0xb looks untrustworthy"). I would imagine > a bare-metal system that has diabled SMT will not do parallel booting, too > (but I haven't had time to test that). Interesting, thanks. Should I change to checking for *both* EAX and EBX being zero? That's what I did first, after reading only the Intel SDM. But I changed to only EAX because the AMD doc only says that EAX will be zero for unsupported leaves. > I did have to change "vmgexit" to a "rep; vmmcall" based on my binutils > and the IS_ENABLED(CONFIG_AMD_MEM_ENCRYPT) change I mentioned before. But > with that, I was able to successfully boot 64 vCPU SEV-ES and SEV-SNP > guests using parallel booting. Thanks. I'll look at retconning that rework of can_parallel_bringup() out into a separate function, into the earlier parts of the series.
On 3/7/23 16:27, David Woodhouse wrote: > On Tue, 2023-03-07 at 16:22 -0600, Tom Lendacky wrote: >> >> I did some Qemu/KVM testing. One thing I noticed is that on AMD, CPUID 0xB >> EAX will be non-zero only if SMT is enabled. So just booting some guests >> without CPU topology never did parallel booting ("smpboot: Disabling >> parallel bringup because CPUID 0xb looks untrustworthy"). I would imagine >> a bare-metal system that has diabled SMT will not do parallel booting, too >> (but I haven't had time to test that). > > Interesting, thanks. Should I change to checking for *both* EAX and EBX > being zero? That's what I did first, after reading only the Intel SDM. > But I changed to only EAX because the AMD doc only says that EAX will > be zero for unsupported leaves. From a baremetal perspective, I think that works. Rome was the first generation to support x2apic, and the PPR for Rome states that 0's are returned in all 4 registers for undefined function numbers. For virtualization, at least Qemu/KVM, that also looks to be a safe test. Thanks, Tom > >> I did have to change "vmgexit" to a "rep; vmmcall" based on my binutils >> and the IS_ENABLED(CONFIG_AMD_MEM_ENCRYPT) change I mentioned before. But >> with that, I was able to successfully boot 64 vCPU SEV-ES and SEV-SNP >> guests using parallel booting. > > Thanks. I'll look at retconning that rework of can_parallel_bringup() > out into a separate function, into the earlier parts of the series.
On Tue, Mar 07, 2023, David Woodhouse wrote: > On Tue, 2023-03-07 at 16:22 -0600, Tom Lendacky wrote: > > > > I did some Qemu/KVM testing. One thing I noticed is that on AMD, CPUID 0xB > > EAX will be non-zero only if SMT is enabled. So just booting some guests > > without CPU topology never did parallel booting ("smpboot: Disabling > > parallel bringup because CPUID 0xb looks untrustworthy"). I would imagine > > a bare-metal system that has diabled SMT will not do parallel booting, too > > (but I haven't had time to test that). > > Interesting, thanks. Should I change to checking for *both* EAX and EBX > being zero? That's what I did first, after reading only the Intel SDM. > But I changed to only EAX because the AMD doc only says that EAX will > be zero for unsupported leaves. LOL, nice. '0' is a prefectly valid output for the shift if there's exactly one CPU at the current level, which is why Intel states that both EAX and EBX are cleared. I assume/hope this is effectively a documentation goof, in that the APM assumes that all "real" CPUs that support CPUID 0xB also support sub-function 0. Nit, the AMD says EAX will be zero for unsupported _levels_. The distinction matters because if the entire leaf is unsupported, AMD behavior is to zero out all output registers, even if the input leaf is above the max supported leaf. Anyways, the kernel already sanity checks the outputs on all x86 CPUs, just piggyback that logic, e.g. expose detect_extended_topology_leaf() or so. If AMD or a VMM is doing something crazy, the kernel is doomed anyways.
On Tue, 2023-03-07 at 15:35 -0800, Sean Christopherson wrote: > On Tue, Mar 07, 2023, David Woodhouse wrote: > > On Tue, 2023-03-07 at 16:22 -0600, Tom Lendacky wrote: > > > > > > I did some Qemu/KVM testing. One thing I noticed is that on AMD, CPUID 0xB > > > EAX will be non-zero only if SMT is enabled. So just booting some guests > > > without CPU topology never did parallel booting ("smpboot: Disabling > > > parallel bringup because CPUID 0xb looks untrustworthy"). I would imagine > > > a bare-metal system that has diabled SMT will not do parallel booting, too > > > (but I haven't had time to test that). > > > > Interesting, thanks. Should I change to checking for *both* EAX and EBX > > being zero? That's what I did first, after reading only the Intel SDM. > > But I changed to only EAX because the AMD doc only says that EAX will > > be zero for unsupported leaves. > > LOL, nice. '0' is a prefectly valid output for the shift if there's exactly one > CPU at the current level, which is why Intel states that both EAX and EBX are > cleared. I assume/hope this is effectively a documentation goof, in that the APM > assumes that all "real" CPUs that support CPUID 0xB also support sub-function 0. > > Nit, the AMD says EAX will be zero for unsupported _levels_. The distinction > matters because if the entire leaf is unsupported, AMD behavior is to zero out > all output registers, even if the input leaf is above the max supported leaf. > > Anyways, the kernel already sanity checks the outputs on all x86 CPUs, just > piggyback that logic, e.g. expose detect_extended_topology_leaf() or so. If AMD > or a VMM is doing something crazy, the kernel is doomed anyways. Well, we have to be somewhat careful with that assumption. Turns out that if CPUID 0xb doesn't have valid data, the kernel *wasn't* doomed anyway. A bunch of our early "WTF doesn't it work on AMD?" issue with this series were due to precisely that. But yeah, check_extended_topology_leaf() does check for EBX being non- zero, and also for SMT_TYPE being in CH. I'll look at just calling that; it'll make the mess of if/else in the prepare_parallel_bringup() function a bit cleaner.
On Tue, 2023-03-07 at 16:55 -0600, Tom Lendacky wrote: > On 3/7/23 16:27, David Woodhouse wrote: > > On Tue, 2023-03-07 at 16:22 -0600, Tom Lendacky wrote: > > > > > > I did some Qemu/KVM testing. One thing I noticed is that on AMD, CPUID 0xB > > > EAX will be non-zero only if SMT is enabled. So just booting some guests > > > without CPU topology never did parallel booting ("smpboot: Disabling > > > parallel bringup because CPUID 0xb looks untrustworthy"). I would imagine > > > a bare-metal system that has diabled SMT will not do parallel booting, too > > > (but I haven't had time to test that). > > > > Interesting, thanks. Should I change to checking for *both* EAX and EBX > > being zero? That's what I did first, after reading only the Intel SDM. > > But I changed to only EAX because the AMD doc only says that EAX will > > be zero for unsupported leaves. > > From a baremetal perspective, I think that works. Rome was the first > generation to support x2apic, and the PPR for Rome states that 0's are > returned in all 4 registers for undefined function numbers. > > For virtualization, at least Qemu/KVM, that also looks to be a safe test. At Sean's suggestion, I've switched it to use the existing check_extended_topology_leaf() which checks for EBX being non-zero, and CH being 1 (SMT_TYPE). I also made it work even if the kernel isn't using x2apic mode (is that even possible, or does SEV-ES require the MSR-based access anyway?) It just looked odd handling SEV-ES in the CPUID 0x0B path but not the CPUID 0x01 case, and I certainly didn't want to implement the asm side for handling CPUID 0x01 via the GHCB protocol. And this way I can pull the check for CC_ATTR_GUEST_STATE_ENCRYPT up above. Which I've kept for now for the reason described in the comment, but I won't die on that hill. https://git.infradead.org/users/dwmw2/linux.git/shortlog/refs/heads/parallel-6.2-v14 Looks like this: /* * We can do 64-bit AP bringup in parallel if the CPU reports its APIC * ID in CPUID (either leaf 0x0B if we need the full APIC ID in X2APIC * mode, or leaf 0x01 if 8 bits are sufficient). Otherwise it's too * hard. */ static bool prepare_parallel_bringup(void) { bool has_sev_es = IS_ENABLED(CONFIG_AMD_MEM_ENCRYPT) && static_branch_unlikely(&sev_es_enable_key); if (IS_ENABLED(CONFIG_X86_32)) return false; /* * Encrypted guests other than SEV-ES (in the future) will need to * implement an early way of finding the APIC ID, since they will * presumably block direct CPUID too. Be kind to our future selves * by warning here instead of just letting them break. Parallel * startup doesn't have to be in the first round of enabling patches * for any such technology. */ if (cc_platform_has(CC_ATTR_GUEST_STATE_ENCRYPT) || !has_sev_es) { pr_info("Disabling parallel bringup due to guest memory encryption\n"); return false; } if (x2apic_mode || has_sev_es) { if (boot_cpu_data.cpuid_level < 0x0b) return false; if (check_extended_topology_leaf(0x0b) != 0) { pr_info("Disabling parallel bringup because CPUID 0xb looks untrustworthy\n"); return false; } if (has_sev_es) { pr_debug("Using SEV-ES CPUID 0xb for parallel CPU startup\n"); smpboot_control = STARTUP_APICID_SEV_ES; } else { pr_debug("Using CPUID 0xb for parallel CPU startup\n"); smpboot_control = STARTUP_APICID_CPUID_0B; } } else { /* Without X2APIC, what's in CPUID 0x01 should suffice. */ if (boot_cpu_data.cpuid_level < 0x01) return false; pr_debug("Using CPUID 0x1 for parallel CPU startup\n"); smpboot_control = STARTUP_APICID_CPUID_01; } cpuhp_setup_state_nocalls(CPUHP_BP_PARALLEL_DYN, "x86/cpu:kick", native_cpu_kick, NULL); return true; }
On 08/03/2023 09:04, David Woodhouse wrote: > On Tue, 2023-03-07 at 16:55 -0600, Tom Lendacky wrote: >> On 3/7/23 16:27, David Woodhouse wrote: >>> On Tue, 2023-03-07 at 16:22 -0600, Tom Lendacky wrote: >>>> >>>> I did some Qemu/KVM testing. One thing I noticed is that on AMD, CPUID 0xB >>>> EAX will be non-zero only if SMT is enabled. So just booting some guests >>>> without CPU topology never did parallel booting ("smpboot: Disabling >>>> parallel bringup because CPUID 0xb looks untrustworthy"). I would imagine >>>> a bare-metal system that has diabled SMT will not do parallel booting, too >>>> (but I haven't had time to test that). >>> >>> Interesting, thanks. Should I change to checking for *both* EAX and EBX >>> being zero? That's what I did first, after reading only the Intel SDM. >>> But I changed to only EAX because the AMD doc only says that EAX will >>> be zero for unsupported leaves. >> >> From a baremetal perspective, I think that works. Rome was the first >> generation to support x2apic, and the PPR for Rome states that 0's are >> returned in all 4 registers for undefined function numbers. >> >> For virtualization, at least Qemu/KVM, that also looks to be a safe test. > > At Sean's suggestion, I've switched it to use the existing > check_extended_topology_leaf() which checks for EBX being non-zero, and > CH being 1 (SMT_TYPE). > > I also made it work even if the kernel isn't using x2apic mode (is that > even possible, or does SEV-ES require the MSR-based access anyway?) > > It just looked odd handling SEV-ES in the CPUID 0x0B path but not the > CPUID 0x01 case, and I certainly didn't want to implement the asm side > for handling CPUID 0x01 via the GHCB protocol. And this way I can pull > the check for CC_ATTR_GUEST_STATE_ENCRYPT up above. Which I've kept for > now for the reason described in the comment, but I won't die on that > hill. > > https://git.infradead.org/users/dwmw2/linux.git/shortlog/refs/heads/parallel-6.2-v14 > > Looks like this: > > /* > * We can do 64-bit AP bringup in parallel if the CPU reports its APIC > * ID in CPUID (either leaf 0x0B if we need the full APIC ID in X2APIC > * mode, or leaf 0x01 if 8 bits are sufficient). Otherwise it's too > * hard. > */ > static bool prepare_parallel_bringup(void) > { > bool has_sev_es = IS_ENABLED(CONFIG_AMD_MEM_ENCRYPT) && > static_branch_unlikely(&sev_es_enable_key); > > if (IS_ENABLED(CONFIG_X86_32)) > return false; > > /* > * Encrypted guests other than SEV-ES (in the future) will need to > * implement an early way of finding the APIC ID, since they will > * presumably block direct CPUID too. Be kind to our future selves > * by warning here instead of just letting them break. Parallel > * startup doesn't have to be in the first round of enabling patches > * for any such technology. > */ > if (cc_platform_has(CC_ATTR_GUEST_STATE_ENCRYPT) || !has_sev_es) { > pr_info("Disabling parallel bringup due to guest memory encryption\n"); > return false; I believe this is still going to enable parallel bringup for TDX? Looking at include/linux/cc_platform.h, it looks like CC_ATTR_GUEST_STATE_ENCRYPT is only set for SEV-ES and TDX guest with x2apic will go on in this function and enable parallel bringup if leaf 0xB is ok. I guess if the apic ID is OK for the TDX guest, then its fine, but just wanted to check if anyone has tested this on TDX guest? > } > > if (x2apic_mode || has_sev_es) { > if (boot_cpu_data.cpuid_level < 0x0b) > return false; > > if (check_extended_topology_leaf(0x0b) != 0) { > pr_info("Disabling parallel bringup because CPUID 0xb looks untrustworthy\n"); > return false; > } > > if (has_sev_es) { > pr_debug("Using SEV-ES CPUID 0xb for parallel CPU startup\n"); > smpboot_control = STARTUP_APICID_SEV_ES; > } else { > pr_debug("Using CPUID 0xb for parallel CPU startup\n"); > smpboot_control = STARTUP_APICID_CPUID_0B; > } > } else { > /* Without X2APIC, what's in CPUID 0x01 should suffice. */ > if (boot_cpu_data.cpuid_level < 0x01) > return false; > > pr_debug("Using CPUID 0x1 for parallel CPU startup\n"); > smpboot_control = STARTUP_APICID_CPUID_01; > } > > cpuhp_setup_state_nocalls(CPUHP_BP_PARALLEL_DYN, "x86/cpu:kick", > native_cpu_kick, NULL); > return true; > } >
From: David Woodhouse > Sent: 08 March 2023 09:05 ... > https://git.infradead.org/users/dwmw2/linux.git/shortlog/refs/heads/parallel-6.2-v14 > > Looks like this: > > /* > * We can do 64-bit AP bringup in parallel if the CPU reports its APIC > * ID in CPUID (either leaf 0x0B if we need the full APIC ID in X2APIC > * mode, or leaf 0x01 if 8 bits are sufficient). Otherwise it's too > * hard. > */ > static bool prepare_parallel_bringup(void) > { > bool has_sev_es = IS_ENABLED(CONFIG_AMD_MEM_ENCRYPT) && > static_branch_unlikely(&sev_es_enable_key); > > if (IS_ENABLED(CONFIG_X86_32)) > return false; > > /* > * Encrypted guests other than SEV-ES (in the future) will need to > * implement an early way of finding the APIC ID, since they will > * presumably block direct CPUID too. Be kind to our future selves > * by warning here instead of just letting them break. Parallel > * startup doesn't have to be in the first round of enabling patches > * for any such technology. > */ > if (cc_platform_has(CC_ATTR_GUEST_STATE_ENCRYPT) || !has_sev_es) { > pr_info("Disabling parallel bringup due to guest memory encryption\n"); > return false; > } That looks wrong, won't has_sev_es almost always be false so it prints the message and returns? Maybe s/||/&&/ ? David - Registered Address Lakeside, Bramley Road, Mount Farm, Milton Keynes, MK1 1PT, UK Registration No: 1397386 (Wales)
On Wed, 2023-03-08 at 12:15 +0000, David Laight wrote: > > > /* > > * Encrypted guests other than SEV-ES (in the future) will need to > > * implement an early way of finding the APIC ID, since they will > > * presumably block direct CPUID too. Be kind to our future selves > > * by warning here instead of just letting them break. Parallel > > * startup doesn't have to be in the first round of enabling patches > > * for any such technology. > > */ > > if (cc_platform_has(CC_ATTR_GUEST_STATE_ENCRYPT) || !has_sev_es) { > > pr_info("Disabling parallel bringup due to guest memory encryption\n"); > > return false; > > } > > That looks wrong, won't has_sev_es almost always be false > so it prints the message and returns? > Maybe s/||/&&/ ? D'oh! Fixed; thanks.
> static bool prepare_parallel_bringup(void) > { > bool has_sev_es = IS_ENABLED(CONFIG_AMD_MEM_ENCRYPT) && > static_branch_unlikely(&sev_es_enable_key); sev_es_enable_key is only defined when CONFIG_AMD_MEM_ENCRYPT is enabled, so it gives a build error when AMD_MEM_ENCRYPT is disabled. maybe below is better? diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 282cca020777..e7df41436cfe 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -1519,8 +1519,12 @@ void __init smp_prepare_cpus_common(void) */ static bool prepare_parallel_bringup(void) { - bool has_sev_es = IS_ENABLED(CONFIG_AMD_MEM_ENCRYPT) && - static_branch_unlikely(&sev_es_enable_key); + bool has_sev_es; +#ifdef CONFIG_AMD_MEM_ENCRYPT + has_sev_es = static_branch_unlikely(&sev_es_enable_key); +#else + has_sev_es = 0; +#endif if (IS_ENABLED(CONFIG_X86_32)) return false;
On 3/8/23 03:04, David Woodhouse wrote: > On Tue, 2023-03-07 at 16:55 -0600, Tom Lendacky wrote: >> On 3/7/23 16:27, David Woodhouse wrote: >>> On Tue, 2023-03-07 at 16:22 -0600, Tom Lendacky wrote: >>>> >>>> I did some Qemu/KVM testing. One thing I noticed is that on AMD, CPUID 0xB >>>> EAX will be non-zero only if SMT is enabled. So just booting some guests >>>> without CPU topology never did parallel booting ("smpboot: Disabling >>>> parallel bringup because CPUID 0xb looks untrustworthy"). I would imagine >>>> a bare-metal system that has diabled SMT will not do parallel booting, too >>>> (but I haven't had time to test that). >>> >>> Interesting, thanks. Should I change to checking for *both* EAX and EBX >>> being zero? That's what I did first, after reading only the Intel SDM. >>> But I changed to only EAX because the AMD doc only says that EAX will >>> be zero for unsupported leaves. >> >> From a baremetal perspective, I think that works. Rome was the first >> generation to support x2apic, and the PPR for Rome states that 0's are >> returned in all 4 registers for undefined function numbers. >> >> For virtualization, at least Qemu/KVM, that also looks to be a safe test. > > At Sean's suggestion, I've switched it to use the existing > check_extended_topology_leaf() which checks for EBX being non-zero, and > CH being 1 (SMT_TYPE). > > I also made it work even if the kernel isn't using x2apic mode (is that > even possible, or does SEV-ES require the MSR-based access anyway?) > > It just looked odd handling SEV-ES in the CPUID 0x0B path but not the > CPUID 0x01 case, and I certainly didn't want to implement the asm side > for handling CPUID 0x01 via the GHCB protocol. And this way I can pull > the check for CC_ATTR_GUEST_STATE_ENCRYPT up above. Which I've kept for > now for the reason described in the comment, but I won't die on that > hill. You can boot an SEV-ES guest in apic mode, but that would be unusual, so I think this approach is fine. Thanks, Tom > > https://git.infradead.org/users/dwmw2/linux.git/shortlog/refs/heads/parallel-6.2-v14 > > Looks like this: > > /* > * We can do 64-bit AP bringup in parallel if the CPU reports its APIC > * ID in CPUID (either leaf 0x0B if we need the full APIC ID in X2APIC > * mode, or leaf 0x01 if 8 bits are sufficient). Otherwise it's too > * hard. > */ > static bool prepare_parallel_bringup(void) > { > bool has_sev_es = IS_ENABLED(CONFIG_AMD_MEM_ENCRYPT) && > static_branch_unlikely(&sev_es_enable_key); > > if (IS_ENABLED(CONFIG_X86_32)) > return false; > > /* > * Encrypted guests other than SEV-ES (in the future) will need to > * implement an early way of finding the APIC ID, since they will > * presumably block direct CPUID too. Be kind to our future selves > * by warning here instead of just letting them break. Parallel > * startup doesn't have to be in the first round of enabling patches > * for any such technology. > */ > if (cc_platform_has(CC_ATTR_GUEST_STATE_ENCRYPT) || !has_sev_es) { > pr_info("Disabling parallel bringup due to guest memory encryption\n"); > return false; > } > > if (x2apic_mode || has_sev_es) { > if (boot_cpu_data.cpuid_level < 0x0b) > return false; > > if (check_extended_topology_leaf(0x0b) != 0) { > pr_info("Disabling parallel bringup because CPUID 0xb looks untrustworthy\n"); > return false; > } > > if (has_sev_es) { > pr_debug("Using SEV-ES CPUID 0xb for parallel CPU startup\n"); > smpboot_control = STARTUP_APICID_SEV_ES; > } else { > pr_debug("Using CPUID 0xb for parallel CPU startup\n"); > smpboot_control = STARTUP_APICID_CPUID_0B; > } > } else { > /* Without X2APIC, what's in CPUID 0x01 should suffice. */ > if (boot_cpu_data.cpuid_level < 0x01) > return false; > > pr_debug("Using CPUID 0x1 for parallel CPU startup\n"); > smpboot_control = STARTUP_APICID_CPUID_01; > } > > cpuhp_setup_state_nocalls(CPUHP_BP_PARALLEL_DYN, "x86/cpu:kick", > native_cpu_kick, NULL); > return true; > } >