diff mbox series

[kvm-unit-tests,v2,09/17] x86 UEFI: Set up page tables

Message ID 20210827031222.2778522-10-zixuanwang@google.com (mailing list archive)
State New, archived
Headers show
Series x86_64 UEFI and AMD SEV/SEV-ES support | expand

Commit Message

Zixuan Wang Aug. 27, 2021, 3:12 a.m. UTC
UEFI sets up page tables before executing EFI application binaries.
These page tables do not allow user space code to access kernel space
memory. But `x86/syscall.c` test case places a user space function
`syscall_tf_user32` inside kernel space memory. When using UEFI page
tables, fetching this kernel memory from user space triggers a #PF
fault, which is not expected by this test case.

KVM-Unit-Tests defines page tables that allow such behavior. So the
solution to this problem is to load KVM-Unit-Tests' page tables:

   1. Copy the page table definition from `x86/cstart64.S`
   2. Update page table entries with runtime memory addresses
   3. Update CR3 register with the new page table root address

Since this commit, `x86/syscall.c` can run in UEFI and generate same
output as in Seabios, using the following command:

   ./x86/efi/run ./x86/syscall.efi --cpu Opteron_G1,vendor=AuthenticAMD

Signed-off-by: Zixuan Wang <zixuanwang@google.com>
---
 lib/x86/asm/page.h   |  1 +
 lib/x86/asm/setup.h  |  3 +++
 lib/x86/setup.c      | 57 ++++++++++++++++++++++++++++++++++++++++++++
 x86/efi/efistart64.S | 21 ++++++++++++++++
 4 files changed, 82 insertions(+)

Comments

Paolo Bonzini Sept. 20, 2021, 3:43 p.m. UTC | #1
On 27/08/21 05:12, Zixuan Wang wrote:
> +static void setup_page_table(void)

It would also be nice if cstart64.S reused setup_page_table, but unlike 
GDT/IDT/TSS I guess it's not super-necessary.

Paolo
Zixuan Wang Sept. 21, 2021, 4:31 a.m. UTC | #2
On Mon, Sep 20, 2021 at 6:26 PM Paolo Bonzini <pbonzini@redhat.com> wrote:
>
> On 27/08/21 05:12, Zixuan Wang wrote:
> > +static void setup_page_table(void)
>
> It would also be nice if cstart64.S reused setup_page_table, but unlike
> GDT/IDT/TSS I guess it's not super-necessary.
>
> Paolo
>

I can update this in the next version. I can also move the page table
definitions to a C file, maybe lib/x86/desc.c or lib/x86/vm.c? I'm not
sure which file is better.

Best regards,
Zixuan
diff mbox series

Patch

diff --git a/lib/x86/asm/page.h b/lib/x86/asm/page.h
index fc14160..f6f740b 100644
--- a/lib/x86/asm/page.h
+++ b/lib/x86/asm/page.h
@@ -31,6 +31,7 @@  typedef unsigned long pgd_t;
 #define PT_ACCESSED_MASK	(1ull << 5)
 #define PT_DIRTY_MASK		(1ull << 6)
 #define PT_PAGE_SIZE_MASK	(1ull << 7)
+#define PT_GLOBAL_MASK		(1ull << 8)
 #define PT64_NX_MASK		(1ull << 63)
 #define PT_ADDR_MASK		GENMASK_ULL(51, 12)
 
diff --git a/lib/x86/asm/setup.h b/lib/x86/asm/setup.h
index 40fd963..16bad0f 100644
--- a/lib/x86/asm/setup.h
+++ b/lib/x86/asm/setup.h
@@ -4,7 +4,9 @@ 
 #ifdef TARGET_EFI
 #include "x86/acpi.h"
 #include "x86/apic.h"
+#include "x86/processor.h"
 #include "x86/smp.h"
+#include "asm/page.h"
 #include "efi.h"
 
 /*
@@ -22,6 +24,7 @@  typedef struct {
 void setup_efi_bootinfo(efi_bootinfo_t *efi_bootinfo);
 void setup_efi(efi_bootinfo_t *efi_bootinfo);
 efi_status_t setup_efi_pre_boot(unsigned long *mapkey, efi_bootinfo_t *efi_bootinfo);
+void setup_5level_page_table(void);
 #endif /* TARGET_EFI */
 
 #endif /* _X86_ASM_SETUP_H_ */
diff --git a/lib/x86/setup.c b/lib/x86/setup.c
index 1ddfb8c..03598fe 100644
--- a/lib/x86/setup.c
+++ b/lib/x86/setup.c
@@ -218,6 +218,62 @@  efi_status_t setup_efi_pre_boot(unsigned long *mapkey, efi_bootinfo_t *efi_booti
 	return EFI_SUCCESS;
 }
 
+/* Defined in cstart64.S or efistart64.S */
+extern phys_addr_t ptl5;
+extern phys_addr_t ptl4;
+extern phys_addr_t ptl3;
+extern phys_addr_t ptl2;
+
+static void setup_page_table(void)
+{
+	pgd_t *curr_pt;
+	phys_addr_t flags;
+	int i;
+
+	/* Set default flags */
+	flags = PT_PRESENT_MASK | PT_WRITABLE_MASK | PT_USER_MASK;
+
+	/* Level 5 */
+	curr_pt = (pgd_t *)&ptl5;
+	curr_pt[0] = ((phys_addr_t)&ptl4) | flags;
+	/* Level 4 */
+	curr_pt = (pgd_t *)&ptl4;
+	curr_pt[0] = ((phys_addr_t)&ptl3) | flags;
+	/* Level 3 */
+	curr_pt = (pgd_t *)&ptl3;
+	for (i = 0; i < 4; i++) {
+		curr_pt[i] = (((phys_addr_t)&ptl2) + i * PAGE_SIZE) | flags;
+	}
+	/* Level 2 */
+	curr_pt = (pgd_t *)&ptl2;
+	flags |= PT_ACCESSED_MASK | PT_DIRTY_MASK | PT_PAGE_SIZE_MASK | PT_GLOBAL_MASK;
+	for (i = 0; i < 4 * 512; i++)	{
+		curr_pt[i] = ((phys_addr_t)(i << 21)) | flags;
+	}
+
+	/* Load 4-level page table */
+	write_cr3((ulong)&ptl4);
+}
+
+void setup_5level_page_table(void)
+{
+	/*  Check if 5-level page table is already enabled */
+	if (read_cr4() & X86_CR4_LA57) {
+		return;
+	}
+
+	/* Disable CR4.PCIDE */
+	write_cr4(read_cr4() & ~(X86_CR4_PCIDE));
+	/* Disable CR0.PG */
+	write_cr0(read_cr0() & ~(X86_CR0_PG));
+
+	/* Load new page table */
+	write_cr3((ulong)&ptl5);
+
+	/* Enable CR4.LA57 */
+	write_cr4(read_cr4() | X86_CR4_LA57);
+}
+
 static void setup_gdt_tss(void)
 {
 	gdt_entry_t *tss_lo, *tss_hi;
@@ -268,6 +324,7 @@  void setup_efi(efi_bootinfo_t *efi_bootinfo)
 	smp_init();
 	phys_alloc_init(efi_bootinfo->free_mem_start, efi_bootinfo->free_mem_size);
 	setup_efi_rsdp(efi_bootinfo->rsdp);
+	setup_page_table();
 }
 
 #endif /* TARGET_EFI */
diff --git a/x86/efi/efistart64.S b/x86/efi/efistart64.S
index a14bd46..86c3760 100644
--- a/x86/efi/efistart64.S
+++ b/x86/efi/efistart64.S
@@ -22,6 +22,27 @@  ring0stacktop:
 
 .data
 
+.align PAGE_SIZE
+.globl ptl2
+ptl2:
+	. = . + 4 * PAGE_SIZE
+.align PAGE_SIZE
+
+.globl ptl3
+ptl3:
+	. = . + PAGE_SIZE
+.align PAGE_SIZE
+
+.globl ptl4
+ptl4:
+	. = . + PAGE_SIZE
+.align PAGE_SIZE
+
+.globl ptl5
+ptl5:
+	. = . + PAGE_SIZE
+.align PAGE_SIZE
+
 boot_idt:
 	.rept 256
 	.quad 0