diff mbox series

[kvm-unit-tests,v2,01/10] x86: Move ap_init() to smp.c

Message ID 20220412173407.13637-2-varad.gautam@suse.com (mailing list archive)
State New, archived
Headers show
Series SMP Support for x86 UEFI Tests | expand

Commit Message

Varad Gautam April 12, 2022, 5:33 p.m. UTC
ap_init() copies the SIPI vector to lowmem, sends INIT/SIPI to APs
and waits on the APs to come up.

Port this routine to C from asm and move it to smp.c to allow sharing
this functionality between the EFI (-fPIC) and non-EFI builds.

Call ap_init() from the EFI setup path to reset the APs to a known
location.

Signed-off-by: Varad Gautam <varad.gautam@suse.com>
---
 lib/x86/setup.c      |  1 +
 lib/x86/smp.c        | 28 ++++++++++++++++++++++++++--
 lib/x86/smp.h        |  1 +
 x86/cstart64.S       | 20 ++------------------
 x86/efi/efistart64.S |  9 +++++++++
 5 files changed, 39 insertions(+), 20 deletions(-)

Comments

Sean Christopherson April 13, 2022, 3:16 p.m. UTC | #1
On Tue, Apr 12, 2022, Varad Gautam wrote:
> ap_init() copies the SIPI vector to lowmem, sends INIT/SIPI to APs
> and waits on the APs to come up.
> 
> Port this routine to C from asm and move it to smp.c to allow sharing
> this functionality between the EFI (-fPIC) and non-EFI builds.
> 
> Call ap_init() from the EFI setup path to reset the APs to a known
> location.
> 
> Signed-off-by: Varad Gautam <varad.gautam@suse.com>
> ---
>  lib/x86/setup.c      |  1 +
>  lib/x86/smp.c        | 28 ++++++++++++++++++++++++++--
>  lib/x86/smp.h        |  1 +
>  x86/cstart64.S       | 20 ++------------------

x86/cstart.S needs to join the party, without being kept in the loop KUT fails to
build on 32-bit targets.

	x86/vmexit.o x86/cstart.o lib/libcflat.a
lib/libcflat.a(smp.o): In function `ap_init':
/home/sean/go/src/kernel.org/kvm-unit-tests/lib/x86/smp.c:150: undefined reference to `sipi_end'
/home/sean/go/src/kernel.org/kvm-unit-tests/lib/x86/smp.c:150: undefined reference to `sipi_entry'
/home/sean/go/src/kernel.org/kvm-unit-tests/lib/x86/smp.c:155: undefined reference to `sipi_entry'
collect2: error: ld returned 1 exit status
/home/sean/go/src/kernel.org/kvm-unit-tests/x86/Makefile.common:65: recipe for target 'x86/vmexit.elf' failed

>  x86/efi/efistart64.S |  9 +++++++++
>  5 files changed, 39 insertions(+), 20 deletions(-)
> 
> diff --git a/lib/x86/setup.c b/lib/x86/setup.c
> index 2d63a44..86ba6de 100644
> --- a/lib/x86/setup.c
> +++ b/lib/x86/setup.c
> @@ -323,6 +323,7 @@ efi_status_t setup_efi(efi_bootinfo_t *efi_bootinfo)
>  	load_idt();
>  	mask_pic_interrupts();
>  	enable_apic();
> +	ap_init();
>  	enable_x2apic();
>  	smp_init();
>  	setup_page_table();
> diff --git a/lib/x86/smp.c b/lib/x86/smp.c
> index 683b25d..d7f5aba 100644
> --- a/lib/x86/smp.c
> +++ b/lib/x86/smp.c
> @@ -18,6 +18,9 @@ static volatile int ipi_done;
>  static volatile bool ipi_wait;
>  static int _cpu_count;
>  static atomic_t active_cpus;
> +extern u8 sipi_entry;
> +extern u8 sipi_end;
> +volatile unsigned cpu_online_count = 1;

Please no bare "unsigned".  But that's a moot point because it actually needs to
be a u16, the asm code does incw.  That's also sort of a moot point because there's
zero chance any of this will work with 65536+ vCPUs, but it's still odd to see.

There's also a declaration in x86/svm_tests.c that needs to get deleted.

	extern u16 cpu_online_count;

Ooh, actually, an even better idea.  Make cpu_online_count a proper atomic_t,
same as active_cpus.  Then the volatile goes away.  Ugh, but atomic_t uses a
volatile.  *sigh*  At least that's contained in one spot that we can fix all at
once if someone gets motivated in the future.

And then rather than leave the

	lock incw cpu_online_count

in asm code, move that to C code to, e.g. ap_call_in() or something (there's gotta
be a standard-ish name for this).  The shortlog can be something like "Move AP
wakeup and rendezvous to smp.c.

And as a bonus, add a printf() in the C helper (assuming that doesn't cause
explosions) to state which AP came online.  That would be super helpful for debug
when someone breaks SMP boot.

>  static __attribute__((used)) void ipi(void)
>  {
> @@ -114,8 +117,6 @@ void smp_init(void)
>  	int i;
>  	void ipi_entry(void);
>  
> -	_cpu_count = fwcfg_get_nb_cpus();
> -
>  	setup_idt();
>  	init_apic_map();
>  	set_idt_entry(IPI_VECTOR, ipi_entry, 0);
> @@ -142,3 +143,26 @@ void smp_reset_apic(void)
>  
>  	atomic_inc(&active_cpus);
>  }
> +
> +void ap_init(void)
> +{
> +	u8 *dst_addr = 0;
> +	size_t sipi_sz = (&sipi_end - &sipi_entry) + 1;
> +
> +	asm volatile("cld");
> +
> +	/* Relocate SIPI vector to dst_addr so it can run in 16-bit mode. */
> +	memcpy(dst_addr, &sipi_entry, sipi_sz);
> +
> +	/* INIT */
> +	apic_icr_write(APIC_DEST_ALLBUT | APIC_DEST_PHYSICAL | APIC_DM_INIT | APIC_INT_ASSERT, 0);
> +
> +	/* SIPI */
> +	apic_icr_write(APIC_DEST_ALLBUT | APIC_DEST_PHYSICAL | APIC_DM_STARTUP, 0);
> +
> +	_cpu_count = fwcfg_get_nb_cpus();
> +

Similar to above, I would be in favor of opportunitically adding a printf to state
that the BSP is about to wait for N number of APs to come online, 
.
> +	while (_cpu_count != cpu_online_count) {
> +		;
> +	}

Curly braces technically aren't needed.

> +}
> diff --git a/lib/x86/smp.h b/lib/x86/smp.h
> index bd303c2..9c92853 100644
Sean Christopherson April 13, 2022, 6:44 p.m. UTC | #2
On Tue, Apr 12, 2022, Varad Gautam wrote:
> @@ -142,3 +143,26 @@ void smp_reset_apic(void)
>  
>  	atomic_inc(&active_cpus);
>  }
> +
> +void ap_init(void)
> +{
> +	u8 *dst_addr = 0;

Oof, this is subtle.  I didn't realize until patch 7 that this is actually using
va=pa=0 for the trampoline.

Does anything actually prevent KUT from allocating pa=0?  Ah, looks like __setup_vm()
excludes the lower 1mb.

'0' should be a #define somewhere, e.g. EFI_RM_TRAMPOLINE_PHYS_ADDR, probably in
lib/alloc_page.h next to AREA_ANY_NUMBER with a comment tying the two together.
And then patch 7 can (hopefully without too much pain) use the define instead of
open coding the reference to PA=0, which is really confusing and unnecessarily
fragile.

E.g. instead of

	/* Retrieve relocated gdt32_descr address at (PAGE_SIZE - 2). */
	mov (PAGE_SIZE - 2), %ebx

hopefully it can be

	mov (EFI_RM_TRAMPOLINE_PHYS_ADDR + PAGE_SIZE - 2), %ebx

> +	size_t sipi_sz = (&sipi_end - &sipi_entry) + 1;

Nit, maybe sipi_trampoline_size?

> +	asm volatile("cld");
> +
> +	/* Relocate SIPI vector to dst_addr so it can run in 16-bit mode. */
> +	memcpy(dst_addr, &sipi_entry, sipi_sz);

A more descriptive name than dst_addr would help, and I'm pretty sure it can be
a void *.  Maybe?

	void *rm_trampoline = EFI_RM_TRAMPOLINE_PHYS_ADDR?

And rather than add the assert+memset in patch 7, do that here.  Oh, and fill
the page with 0xcc, i.e. int3, instead of 0, so that if an AP wanders into the
weeds, it gets a fault and shutdown.  All zeros decodes to ADD [rax], rax (or maybe
the reverse?), i.e. isn't guaranteed to fail right away.

	assert(sipi_trampoline_size < PAGE_SIZE);

	/* Comment goes here about filling with 0xcc. */
	memset(rm_trampoline, 0xcc, PAGE_SIZE);
	memcpy(rm_trampoline, &sipi_entry, sipi_trampoline_size);
Sean Christopherson April 13, 2022, 6:55 p.m. UTC | #3
On Wed, Apr 13, 2022, Sean Christopherson wrote:
> On Tue, Apr 12, 2022, Varad Gautam wrote:
> > @@ -142,3 +143,26 @@ void smp_reset_apic(void)
> >  
> >  	atomic_inc(&active_cpus);
> >  }
> > +
> > +void ap_init(void)
> > +{
> > +	u8 *dst_addr = 0;
> 
> Oof, this is subtle.  I didn't realize until patch 7 that this is actually using
> va=pa=0 for the trampoline.
> 
> Does anything actually prevent KUT from allocating pa=0?  Ah, looks like __setup_vm()
> excludes the lower 1mb.
> 
> '0' should be a #define somewhere, e.g. EFI_RM_TRAMPOLINE_PHYS_ADDR, probably in
> lib/alloc_page.h next to AREA_ANY_NUMBER with a comment tying the two together.
> And then patch 7 can (hopefully without too much pain) use the define instead of
> open coding the reference to PA=0, which is really confusing and unnecessarily
> fragile.
> 
> E.g. instead of
> 
> 	/* Retrieve relocated gdt32_descr address at (PAGE_SIZE - 2). */
> 	mov (PAGE_SIZE - 2), %ebx
> 
> hopefully it can be
> 
> 	mov (EFI_RM_TRAMPOLINE_PHYS_ADDR + PAGE_SIZE - 2), %ebx
> 
> > +	size_t sipi_sz = (&sipi_end - &sipi_entry) + 1;
> 
> Nit, maybe sipi_trampoline_size?

Ooooh, and I finally realized in patch 7 that the "sipi" area encompasses the GDT
as well.  I couldn't figure out how the GDT was getting relocated :-)

Definitely needs a different name.  How about efi_rm_trampoline_start,
efi_rm_trampoline_end, and efi_rm_trampoline_size?  The "real mode" part is
kinda misleading, but on the other hand it's also the main reason why this needs
to be relocated to super low memory.  And add a comment 

That will help make it a little more obvious that there's more than just the SIPI
code that is getting manually relocated.

It's probably still worth having an explicit sipi_entry label, with a comment to
document that it absolutely needs to be at the start of the trampoline so that
the SIPI vector sends APs to the right location.
Sean Christopherson April 13, 2022, 6:59 p.m. UTC | #4
On Wed, Apr 13, 2022, Sean Christopherson wrote:
> On Wed, Apr 13, 2022, Sean Christopherson wrote:
> > On Tue, Apr 12, 2022, Varad Gautam wrote:
> > > @@ -142,3 +143,26 @@ void smp_reset_apic(void)
> > >  
> > >  	atomic_inc(&active_cpus);
> > >  }
> > > +
> > > +void ap_init(void)

Sorry for chaining these, I keep understanding more things as I read through the
end of the series.  Hopefully this is the last one.

Can this be named setup_efi_rm_trampoline()?  Or whatever best matches the name
we decide on.  I keep thinking APs bounce through this to do their initialization,
but it's the BSP doing setup to prep waking the APs.
Sean Christopherson April 13, 2022, 7:01 p.m. UTC | #5
On Wed, Apr 13, 2022, Sean Christopherson wrote:
> On Wed, Apr 13, 2022, Sean Christopherson wrote:
> > On Wed, Apr 13, 2022, Sean Christopherson wrote:
> > > On Tue, Apr 12, 2022, Varad Gautam wrote:
> > > > @@ -142,3 +143,26 @@ void smp_reset_apic(void)
> > > >  
> > > >  	atomic_inc(&active_cpus);
> > > >  }
> > > > +
> > > > +void ap_init(void)
> 
> Sorry for chaining these, I keep understanding more things as I read through the
> end of the series.  Hopefully this is the last one.
> 
> Can this be named setup_efi_rm_trampoline()?  Or whatever best matches the name
> we decide on.  I keep thinking APs bounce through this to do their initialization,
> but it's the BSP doing setup to prep waking the APs.

Well that didn't take long.  Just realized this isn't unique to EFI, and it also
does the waking.  Maybe wake_aps()?  That makes smp_init() even more confusing
(I keep thinking that it wakes APs...), but IMO smp_init() is the one that needs
a new name.
Sean Christopherson April 13, 2022, 7:04 p.m. UTC | #6
On Wed, Apr 13, 2022, Sean Christopherson wrote:
> On Wed, Apr 13, 2022, Sean Christopherson wrote:
> > On Wed, Apr 13, 2022, Sean Christopherson wrote:
> > > On Wed, Apr 13, 2022, Sean Christopherson wrote:
> > > > On Tue, Apr 12, 2022, Varad Gautam wrote:
> > > > > @@ -142,3 +143,26 @@ void smp_reset_apic(void)
> > > > >  
> > > > >  	atomic_inc(&active_cpus);
> > > > >  }
> > > > > +
> > > > > +void ap_init(void)
> > 
> > Sorry for chaining these, I keep understanding more things as I read through the
> > end of the series.  Hopefully this is the last one.
> > 
> > Can this be named setup_efi_rm_trampoline()?  Or whatever best matches the name
> > we decide on.  I keep thinking APs bounce through this to do their initialization,
> > but it's the BSP doing setup to prep waking the APs.
> 
> Well that didn't take long.  Just realized this isn't unique to EFI, and it also
> does the waking.  Maybe wake_aps()?  That makes smp_init() even more confusing
> (I keep thinking that it wakes APs...), but IMO smp_init() is the one that needs
> a new name.

*sigh*  Ignore the last complaint about smp_init(), it does do SMP initialization
by sending IPIs.  We should really change that, but that's a future problem.
diff mbox series

Patch

diff --git a/lib/x86/setup.c b/lib/x86/setup.c
index 2d63a44..86ba6de 100644
--- a/lib/x86/setup.c
+++ b/lib/x86/setup.c
@@ -323,6 +323,7 @@  efi_status_t setup_efi(efi_bootinfo_t *efi_bootinfo)
 	load_idt();
 	mask_pic_interrupts();
 	enable_apic();
+	ap_init();
 	enable_x2apic();
 	smp_init();
 	setup_page_table();
diff --git a/lib/x86/smp.c b/lib/x86/smp.c
index 683b25d..d7f5aba 100644
--- a/lib/x86/smp.c
+++ b/lib/x86/smp.c
@@ -18,6 +18,9 @@  static volatile int ipi_done;
 static volatile bool ipi_wait;
 static int _cpu_count;
 static atomic_t active_cpus;
+extern u8 sipi_entry;
+extern u8 sipi_end;
+volatile unsigned cpu_online_count = 1;
 
 static __attribute__((used)) void ipi(void)
 {
@@ -114,8 +117,6 @@  void smp_init(void)
 	int i;
 	void ipi_entry(void);
 
-	_cpu_count = fwcfg_get_nb_cpus();
-
 	setup_idt();
 	init_apic_map();
 	set_idt_entry(IPI_VECTOR, ipi_entry, 0);
@@ -142,3 +143,26 @@  void smp_reset_apic(void)
 
 	atomic_inc(&active_cpus);
 }
+
+void ap_init(void)
+{
+	u8 *dst_addr = 0;
+	size_t sipi_sz = (&sipi_end - &sipi_entry) + 1;
+
+	asm volatile("cld");
+
+	/* Relocate SIPI vector to dst_addr so it can run in 16-bit mode. */
+	memcpy(dst_addr, &sipi_entry, sipi_sz);
+
+	/* INIT */
+	apic_icr_write(APIC_DEST_ALLBUT | APIC_DEST_PHYSICAL | APIC_DM_INIT | APIC_INT_ASSERT, 0);
+
+	/* SIPI */
+	apic_icr_write(APIC_DEST_ALLBUT | APIC_DEST_PHYSICAL | APIC_DM_STARTUP, 0);
+
+	_cpu_count = fwcfg_get_nb_cpus();
+
+	while (_cpu_count != cpu_online_count) {
+		;
+	}
+}
diff --git a/lib/x86/smp.h b/lib/x86/smp.h
index bd303c2..9c92853 100644
--- a/lib/x86/smp.h
+++ b/lib/x86/smp.h
@@ -78,5 +78,6 @@  void on_cpu(int cpu, void (*function)(void *data), void *data);
 void on_cpu_async(int cpu, void (*function)(void *data), void *data);
 void on_cpus(void (*function)(void *data), void *data);
 void smp_reset_apic(void);
+void ap_init(void);
 
 #endif
diff --git a/x86/cstart64.S b/x86/cstart64.S
index 7272452..f371d06 100644
--- a/x86/cstart64.S
+++ b/x86/cstart64.S
@@ -157,6 +157,7 @@  gdt32:
 gdt32_end:
 
 .code16
+.globl sipi_entry
 sipi_entry:
 	mov %cr0, %eax
 	or $1, %eax
@@ -168,6 +169,7 @@  gdt32_descr:
 	.word gdt32_end - gdt32 - 1
 	.long gdt32
 
+.globl sipi_end
 sipi_end:
 
 .code32
@@ -240,21 +242,3 @@  lvl5:
 
 online_cpus:
 	.fill (max_cpus + 7) / 8, 1, 0
-
-ap_init:
-	cld
-	lea sipi_entry, %rsi
-	xor %rdi, %rdi
-	mov $(sipi_end - sipi_entry), %rcx
-	rep movsb
-	mov $APIC_DEFAULT_PHYS_BASE, %eax
-	movl $(APIC_DEST_ALLBUT | APIC_DEST_PHYSICAL | APIC_DM_INIT | APIC_INT_ASSERT), APIC_ICR(%rax)
-	movl $(APIC_DEST_ALLBUT | APIC_DEST_PHYSICAL | APIC_DM_STARTUP), APIC_ICR(%rax)
-	call fwcfg_get_nb_cpus
-1:	pause
-	cmpw %ax, cpu_online_count
-	jne 1b
-	ret
-
-.align 2
-cpu_online_count:	.word 1
diff --git a/x86/efi/efistart64.S b/x86/efi/efistart64.S
index 017abba..0425153 100644
--- a/x86/efi/efistart64.S
+++ b/x86/efi/efistart64.S
@@ -57,3 +57,12 @@  load_gdt_tss:
 	pushq $0x08 /* 2nd entry in gdt64: 64-bit code segment */
 	pushq %rdi
 	lretq
+
+.code16
+
+.globl sipi_entry
+sipi_entry:
+	jmp sipi_entry
+
+.globl sipi_end
+sipi_end: