Message ID | bd23a05ea25b2f431bb0655ca6402073f9cf49b8.1715102098.git.alejandro.vallejo@cloud.com (mailing list archive) |
---|---|
State | Superseded |
Headers | show |
Series | x86: Expose consistent topology to guests | expand |
On 08/05/2024 1:39 pm, Alejandro Vallejo wrote: > Removes a needless assembly entry point and simplifies the codebase by allowing > hvmloader to wake APs it doesn't know the APIC ID of. > > Signed-off-by: Alejandro Vallejo <alejandro.vallejo@cloud.com> This is basically independent of the rest of the series, and it would be good to pull it in separately. A few notes. The commit message ought to note that this has a side effect of removing an LMSW instruction which has fastpath at all on AMD CPUs, and requires full emulation, and it gets rid of 13 vLAPIC emulations when 3 hypercalls would do. The point being that this is borderline backport material, although it does depend on the 32 vCPU bugfix. > --- > v2: > * New patch. Replaces adding cpu policy to hvmloader in v1. > --- > tools/firmware/hvmloader/smp.c | 111 +++++++++++++-------------------- > 1 file changed, 44 insertions(+), 67 deletions(-) > > diff --git a/tools/firmware/hvmloader/smp.c b/tools/firmware/hvmloader/smp.c > index 082b17f13818..a668f15d7e1f 100644 > --- a/tools/firmware/hvmloader/smp.c > +++ b/tools/firmware/hvmloader/smp.c > @@ -22,88 +22,68 @@ > #include "util.h" > #include "config.h" > #include "apic_regs.h" > +#include "hypercall.h" > > -#define AP_BOOT_EIP 0x1000 > -extern char ap_boot_start[], ap_boot_end[]; > +#include <xen/asm/x86-defns.h> > +#include <xen/hvm/hvm_vcpu.h> > + > +#include <xen/vcpu.h> > > static int ap_callin, ap_cpuid; > > -asm ( > - " .text \n" > - " .code16 \n" > - "ap_boot_start: .code16 \n" > - " mov %cs,%ax \n" > - " mov %ax,%ds \n" > - " lgdt gdt_desr-ap_boot_start\n" > - " xor %ax, %ax \n" > - " inc %ax \n" > - " lmsw %ax \n" > - " ljmpl $0x08,$1f \n" > - "gdt_desr: \n" > - " .word gdt_end - gdt - 1 \n" > - " .long gdt \n" > - "ap_boot_end: .code32 \n" > - "1: mov $0x10,%eax \n" > - " mov %eax,%ds \n" > - " mov %eax,%es \n" > - " mov %eax,%ss \n" > - " movl $stack_top,%esp \n" > - " movl %esp,%ebp \n" > - " call ap_start \n" > - "1: hlt \n" > - " jmp 1b \n" > - " \n" > - " .align 8 \n" > - "gdt: \n" > - " .quad 0x0000000000000000 \n" > - " .quad 0x00cf9a000000ffff \n" /* 0x08: Flat code segment */ > - " .quad 0x00cf92000000ffff \n" /* 0x10: Flat data segment */ > - "gdt_end: \n" > - " \n" > - " .bss \n" > - " .align 8 \n" > - "stack: \n" > - " .skip 0x4000 \n" > - "stack_top: \n" > - " .text \n" > - ); > - > -void ap_start(void); /* non-static avoids unused-function compiler warning */ > -/*static*/ void ap_start(void) > +static void ap_start(void) > { > printf(" - CPU%d ... ", ap_cpuid); > cacheattr_init(); > printf("done.\n"); > + > + if ( !ap_cpuid ) > + return; > + > wmb(); > ap_callin = 1; /* After this point, the BSP will shut us down. */ > -} > > -static void lapic_wait_ready(void) > -{ > - while ( lapic_read(APIC_ICR) & APIC_ICR_BUSY ) > - cpu_relax(); > + while ( 1 ) For this, we tend to favour "for ( ;; )". > + asm volatile ( "hlt" ); > } > > static void boot_cpu(unsigned int cpu) > { > - unsigned int icr2 = SET_APIC_DEST_FIELD(LAPIC_ID(cpu)); > + static uint8_t ap_stack[4 * PAGE_SIZE] __attribute__ ((aligned (16))); I know you're just copying what was there, but 4 pages is stupidly large for something that needs about 4 stack slots. 4K is absolutely plenty. > + static struct vcpu_hvm_context ap; > > /* Initialise shared variables. */ > ap_cpuid = cpu; > - ap_callin = 0; > wmb(); > > - /* Wake up the secondary processor: INIT-SIPI-SIPI... */ > - lapic_wait_ready(); > - lapic_write(APIC_ICR2, icr2); > - lapic_write(APIC_ICR, APIC_DM_INIT); > - lapic_wait_ready(); > - lapic_write(APIC_ICR2, icr2); > - lapic_write(APIC_ICR, APIC_DM_STARTUP | (AP_BOOT_EIP >> 12)); > - lapic_wait_ready(); > - lapic_write(APIC_ICR2, icr2); > - lapic_write(APIC_ICR, APIC_DM_STARTUP | (AP_BOOT_EIP >> 12)); > - lapic_wait_ready(); > + /* Wake up the secondary processor */ > + ap = (struct vcpu_hvm_context) { > + .mode = VCPU_HVM_MODE_32B, > + .cpu_regs.x86_32 = { > + .eip = (uint32_t)ap_start, > + .esp = (uint32_t)ap_stack + ARRAY_SIZE(ap_stack), Not that it really matters, but these want to be unsigned long casts. > + > + /* Protected mode with MMU off */ > + .cr0 = X86_CR0_PE, > + > + /* Prepopulate the GDT */ /* 32bit Flat Mode */ This isn't the GDT; it's the segment registers, but "Flat Mode" is the more meaningful term to use. I'm happy to fix all on commit. ~Andrew
On 08/05/2024 20:13, Andrew Cooper wrote: > On 08/05/2024 1:39 pm, Alejandro Vallejo wrote: >> Removes a needless assembly entry point and simplifies the codebase by allowing >> hvmloader to wake APs it doesn't know the APIC ID of. >> >> Signed-off-by: Alejandro Vallejo <alejandro.vallejo@cloud.com> > > This is basically independent of the rest of the series, and it would be > good to pull it in separately. A few notes. > > The commit message ought to note that this has a side effect of removing > an LMSW instruction which has fastpath at all on AMD CPUs, and requires > full emulation, and it gets rid of 13 vLAPIC emulations when 3 > hypercalls would do. > > The point being that this is borderline backport material, although it > does depend on the 32 vCPU bugfix. > >> --- >> v2: >> * New patch. Replaces adding cpu policy to hvmloader in v1. >> --- >> tools/firmware/hvmloader/smp.c | 111 +++++++++++++-------------------- >> 1 file changed, 44 insertions(+), 67 deletions(-) >> >> diff --git a/tools/firmware/hvmloader/smp.c b/tools/firmware/hvmloader/smp.c >> index 082b17f13818..a668f15d7e1f 100644 >> --- a/tools/firmware/hvmloader/smp.c >> +++ b/tools/firmware/hvmloader/smp.c >> @@ -22,88 +22,68 @@ >> #include "util.h" >> #include "config.h" >> #include "apic_regs.h" >> +#include "hypercall.h" >> >> -#define AP_BOOT_EIP 0x1000 >> -extern char ap_boot_start[], ap_boot_end[]; >> +#include <xen/asm/x86-defns.h> >> +#include <xen/hvm/hvm_vcpu.h> >> + >> +#include <xen/vcpu.h> >> >> static int ap_callin, ap_cpuid; >> >> -asm ( >> - " .text \n" >> - " .code16 \n" >> - "ap_boot_start: .code16 \n" >> - " mov %cs,%ax \n" >> - " mov %ax,%ds \n" >> - " lgdt gdt_desr-ap_boot_start\n" >> - " xor %ax, %ax \n" >> - " inc %ax \n" >> - " lmsw %ax \n" >> - " ljmpl $0x08,$1f \n" >> - "gdt_desr: \n" >> - " .word gdt_end - gdt - 1 \n" >> - " .long gdt \n" >> - "ap_boot_end: .code32 \n" >> - "1: mov $0x10,%eax \n" >> - " mov %eax,%ds \n" >> - " mov %eax,%es \n" >> - " mov %eax,%ss \n" >> - " movl $stack_top,%esp \n" >> - " movl %esp,%ebp \n" >> - " call ap_start \n" >> - "1: hlt \n" >> - " jmp 1b \n" >> - " \n" >> - " .align 8 \n" >> - "gdt: \n" >> - " .quad 0x0000000000000000 \n" >> - " .quad 0x00cf9a000000ffff \n" /* 0x08: Flat code segment */ >> - " .quad 0x00cf92000000ffff \n" /* 0x10: Flat data segment */ >> - "gdt_end: \n" >> - " \n" >> - " .bss \n" >> - " .align 8 \n" >> - "stack: \n" >> - " .skip 0x4000 \n" >> - "stack_top: \n" >> - " .text \n" >> - ); >> - >> -void ap_start(void); /* non-static avoids unused-function compiler warning */ >> -/*static*/ void ap_start(void) >> +static void ap_start(void) >> { >> printf(" - CPU%d ... ", ap_cpuid); >> cacheattr_init(); >> printf("done.\n"); >> + >> + if ( !ap_cpuid ) >> + return; >> + >> wmb(); >> ap_callin = 1; > > /* After this point, the BSP will shut us down. */ > >> -} >> >> -static void lapic_wait_ready(void) >> -{ >> - while ( lapic_read(APIC_ICR) & APIC_ICR_BUSY ) >> - cpu_relax(); >> + while ( 1 ) > > For this, we tend to favour "for ( ;; )". > >> + asm volatile ( "hlt" ); >> } >> >> static void boot_cpu(unsigned int cpu) >> { >> - unsigned int icr2 = SET_APIC_DEST_FIELD(LAPIC_ID(cpu)); >> + static uint8_t ap_stack[4 * PAGE_SIZE] __attribute__ ((aligned (16))); > > I know you're just copying what was there, but 4 pages is stupidly large > for something that needs about 4 stack slots. > > 4K is absolutely plenty. > >> + static struct vcpu_hvm_context ap; >> >> /* Initialise shared variables. */ >> ap_cpuid = cpu; >> - ap_callin = 0; >> wmb(); >> >> - /* Wake up the secondary processor: INIT-SIPI-SIPI... */ >> - lapic_wait_ready(); >> - lapic_write(APIC_ICR2, icr2); >> - lapic_write(APIC_ICR, APIC_DM_INIT); >> - lapic_wait_ready(); >> - lapic_write(APIC_ICR2, icr2); >> - lapic_write(APIC_ICR, APIC_DM_STARTUP | (AP_BOOT_EIP >> 12)); >> - lapic_wait_ready(); >> - lapic_write(APIC_ICR2, icr2); >> - lapic_write(APIC_ICR, APIC_DM_STARTUP | (AP_BOOT_EIP >> 12)); >> - lapic_wait_ready(); >> + /* Wake up the secondary processor */ >> + ap = (struct vcpu_hvm_context) { >> + .mode = VCPU_HVM_MODE_32B, >> + .cpu_regs.x86_32 = { >> + .eip = (uint32_t)ap_start, >> + .esp = (uint32_t)ap_stack + ARRAY_SIZE(ap_stack), > > Not that it really matters, but these want to be unsigned long casts. > >> + >> + /* Protected mode with MMU off */ >> + .cr0 = X86_CR0_PE, >> + >> + /* Prepopulate the GDT */ > > /* 32bit Flat Mode */ > > This isn't the GDT; it's the segment registers, but "Flat Mode" is the > more meaningful term to use. > > > I'm happy to fix all on commit. > > ~Andrew All sound ok to me. Cheers, Alejandro
diff --git a/tools/firmware/hvmloader/smp.c b/tools/firmware/hvmloader/smp.c index 082b17f13818..a668f15d7e1f 100644 --- a/tools/firmware/hvmloader/smp.c +++ b/tools/firmware/hvmloader/smp.c @@ -22,88 +22,68 @@ #include "util.h" #include "config.h" #include "apic_regs.h" +#include "hypercall.h" -#define AP_BOOT_EIP 0x1000 -extern char ap_boot_start[], ap_boot_end[]; +#include <xen/asm/x86-defns.h> +#include <xen/hvm/hvm_vcpu.h> + +#include <xen/vcpu.h> static int ap_callin, ap_cpuid; -asm ( - " .text \n" - " .code16 \n" - "ap_boot_start: .code16 \n" - " mov %cs,%ax \n" - " mov %ax,%ds \n" - " lgdt gdt_desr-ap_boot_start\n" - " xor %ax, %ax \n" - " inc %ax \n" - " lmsw %ax \n" - " ljmpl $0x08,$1f \n" - "gdt_desr: \n" - " .word gdt_end - gdt - 1 \n" - " .long gdt \n" - "ap_boot_end: .code32 \n" - "1: mov $0x10,%eax \n" - " mov %eax,%ds \n" - " mov %eax,%es \n" - " mov %eax,%ss \n" - " movl $stack_top,%esp \n" - " movl %esp,%ebp \n" - " call ap_start \n" - "1: hlt \n" - " jmp 1b \n" - " \n" - " .align 8 \n" - "gdt: \n" - " .quad 0x0000000000000000 \n" - " .quad 0x00cf9a000000ffff \n" /* 0x08: Flat code segment */ - " .quad 0x00cf92000000ffff \n" /* 0x10: Flat data segment */ - "gdt_end: \n" - " \n" - " .bss \n" - " .align 8 \n" - "stack: \n" - " .skip 0x4000 \n" - "stack_top: \n" - " .text \n" - ); - -void ap_start(void); /* non-static avoids unused-function compiler warning */ -/*static*/ void ap_start(void) +static void ap_start(void) { printf(" - CPU%d ... ", ap_cpuid); cacheattr_init(); printf("done.\n"); + + if ( !ap_cpuid ) + return; + wmb(); ap_callin = 1; -} -static void lapic_wait_ready(void) -{ - while ( lapic_read(APIC_ICR) & APIC_ICR_BUSY ) - cpu_relax(); + while ( 1 ) + asm volatile ( "hlt" ); } static void boot_cpu(unsigned int cpu) { - unsigned int icr2 = SET_APIC_DEST_FIELD(LAPIC_ID(cpu)); + static uint8_t ap_stack[4 * PAGE_SIZE] __attribute__ ((aligned (16))); + static struct vcpu_hvm_context ap; /* Initialise shared variables. */ ap_cpuid = cpu; - ap_callin = 0; wmb(); - /* Wake up the secondary processor: INIT-SIPI-SIPI... */ - lapic_wait_ready(); - lapic_write(APIC_ICR2, icr2); - lapic_write(APIC_ICR, APIC_DM_INIT); - lapic_wait_ready(); - lapic_write(APIC_ICR2, icr2); - lapic_write(APIC_ICR, APIC_DM_STARTUP | (AP_BOOT_EIP >> 12)); - lapic_wait_ready(); - lapic_write(APIC_ICR2, icr2); - lapic_write(APIC_ICR, APIC_DM_STARTUP | (AP_BOOT_EIP >> 12)); - lapic_wait_ready(); + /* Wake up the secondary processor */ + ap = (struct vcpu_hvm_context) { + .mode = VCPU_HVM_MODE_32B, + .cpu_regs.x86_32 = { + .eip = (uint32_t)ap_start, + .esp = (uint32_t)ap_stack + ARRAY_SIZE(ap_stack), + + /* Protected mode with MMU off */ + .cr0 = X86_CR0_PE, + + /* Prepopulate the GDT */ + .cs_limit = -1U, + .ds_limit = -1U, + .ss_limit = -1U, + .es_limit = -1U, + .tr_limit = 0x67, + .cs_ar = 0xc9b, + .ds_ar = 0xc93, + .es_ar = 0xc93, + .ss_ar = 0xc93, + .tr_ar = 0x8b, + }, + }; + + if ( hypercall_vcpu_op(VCPUOP_initialise, cpu, &ap) ) + BUG(); + if ( hypercall_vcpu_op(VCPUOP_up, cpu, NULL) ) + BUG(); /* * Wait for the secondary processor to complete initialisation. @@ -113,17 +93,14 @@ static void boot_cpu(unsigned int cpu) cpu_relax(); /* Take the secondary processor offline. */ - lapic_write(APIC_ICR2, icr2); - lapic_write(APIC_ICR, APIC_DM_INIT); - lapic_wait_ready(); + if ( hypercall_vcpu_op(VCPUOP_down, cpu, NULL) ) + BUG(); } void smp_initialise(void) { unsigned int i, nr_cpus = hvm_info->nr_vcpus; - memcpy((void *)AP_BOOT_EIP, ap_boot_start, ap_boot_end - ap_boot_start); - printf("Multiprocessor initialisation:\n"); ap_start(); for ( i = 1; i < nr_cpus; i++ )
Removes a needless assembly entry point and simplifies the codebase by allowing hvmloader to wake APs it doesn't know the APIC ID of. Signed-off-by: Alejandro Vallejo <alejandro.vallejo@cloud.com> --- v2: * New patch. Replaces adding cpu policy to hvmloader in v1. --- tools/firmware/hvmloader/smp.c | 111 +++++++++++++-------------------- 1 file changed, 44 insertions(+), 67 deletions(-)