@@ -31,6 +31,7 @@ typedef unsigned long pgd_t;
#define PT_ACCESSED_MASK (1ull << 5)
#define PT_DIRTY_MASK (1ull << 6)
#define PT_PAGE_SIZE_MASK (1ull << 7)
+#define PT_GLOBAL_MASK (1ull << 8)
#define PT64_NX_MASK (1ull << 63)
#define PT_ADDR_MASK GENMASK_ULL(51, 12)
@@ -4,7 +4,9 @@
#ifdef TARGET_EFI
#include "x86/acpi.h"
#include "x86/apic.h"
+#include "x86/processor.h"
#include "x86/smp.h"
+#include "asm/page.h"
#ifdef ALIGN
#undef ALIGN
@@ -26,6 +28,7 @@ typedef struct {
void setup_efi_bootinfo(efi_bootinfo_t *efi_bootinfo);
void setup_efi(efi_bootinfo_t *efi_bootinfo);
EFI_STATUS setup_efi_pre_boot(UINTN *mapkey, efi_bootinfo_t *efi_bootinfo);
+void setup_5level_page_table(void);
#endif /* TARGET_EFI */
#endif /* _X86_ASM_SETUP_H_ */
@@ -217,6 +217,62 @@ EFI_STATUS setup_efi_pre_boot(UINTN *mapkey, efi_bootinfo_t *efi_bootinfo)
return EFI_SUCCESS;
}
+/* Defined in cstart64.S or efistart64.S */
+extern phys_addr_t ptl5;
+extern phys_addr_t ptl4;
+extern phys_addr_t ptl3;
+extern phys_addr_t ptl2;
+
+static void setup_page_table(void)
+{
+ pgd_t *curr_pt;
+ phys_addr_t flags;
+ int i;
+
+ /* Set default flags */
+ flags = PT_PRESENT_MASK | PT_WRITABLE_MASK | PT_USER_MASK;
+
+ /* Level 5 */
+ curr_pt = (pgd_t *)&ptl5;
+ curr_pt[0] = ((phys_addr_t)&ptl4) | flags;
+ /* Level 4 */
+ curr_pt = (pgd_t *)&ptl4;
+ curr_pt[0] = ((phys_addr_t)&ptl3) | flags;
+ /* Level 3 */
+ curr_pt = (pgd_t *)&ptl3;
+ for (i = 0; i < 4; i++) {
+ curr_pt[i] = (((phys_addr_t)&ptl2) + i * PAGE_SIZE) | flags;
+ }
+ /* Level 2 */
+ curr_pt = (pgd_t *)&ptl2;
+ flags |= PT_ACCESSED_MASK | PT_DIRTY_MASK | PT_PAGE_SIZE_MASK | PT_GLOBAL_MASK;
+ for (i = 0; i < 4 * 512; i++) {
+ curr_pt[i] = ((phys_addr_t)(i << 21)) | flags;
+ }
+
+ /* Load 4-level page table */
+ write_cr3((ulong)&ptl4);
+}
+
+void setup_5level_page_table(void)
+{
+ /* Check if 5-level page table is already enabled */
+ if (read_cr4() & X86_CR4_LA57) {
+ return;
+ }
+
+ /* Disable CR4.PCIDE */
+ write_cr4(read_cr4() & ~(X86_CR4_PCIDE));
+ /* Disable CR0.PG */
+ write_cr0(read_cr0() & ~(X86_CR0_PG));
+
+ /* Load new page table */
+ write_cr3((ulong)&ptl5);
+
+ /* Enable CR4.LA57 */
+ write_cr4(read_cr4() | X86_CR4_LA57);
+}
+
static void setup_gdt_tss(void)
{
gdt_entry_t *tss_lo, *tss_hi;
@@ -268,6 +324,7 @@ void setup_efi(efi_bootinfo_t *efi_bootinfo)
phys_alloc_init(efi_bootinfo->free_mem_start,
efi_bootinfo->free_mem_size);
setup_efi_rsdp(efi_bootinfo->rsdp);
+ setup_page_table();
}
#endif /* TARGET_EFI */
@@ -21,6 +21,27 @@ ring0stacktop:
.data
+.align PAGE_SIZE
+.globl ptl2
+ptl2:
+ . = . + 4 * PAGE_SIZE
+.align PAGE_SIZE
+
+.globl ptl3
+ptl3:
+ . = . + PAGE_SIZE
+.align PAGE_SIZE
+
+.globl ptl4
+ptl4:
+ . = . + PAGE_SIZE
+.align PAGE_SIZE
+
+.globl ptl5
+ptl5:
+ . = . + PAGE_SIZE
+.align PAGE_SIZE
+
boot_idt:
.rept 256
.quad 0
UEFI sets up page tables before executing EFI application binaries. These page tables do not allow user space code to access kernel space memory. But `x86/syscall.c` test case places a user space function `syscall_tf_user32` inside kernel space memory. When using UEFI page tables, fetching this kernel memory from user space triggers a #PF fault, which is not expected by this test case. KVM-Unit-Tests defines page tables that allow such behavior. So the solution to this problem is to load KVM-Unit-Tests' page tables: 1. Copy the page table definition from `x86/cstart64.S` 2. Update page table entries with runtime memory addresses 3. Update CR3 register with the new page table root address Since this commit, `x86/syscall.c` can run in UEFI and generate same output as in Seabios, using the following command: ./x86/efi/run ./x86/syscall.efi --cpu Opteron_G1,vendor=AuthenticAMD Signed-off-by: Zixuan Wang <zixuanwang@google.com> --- lib/x86/asm/page.h | 1 + lib/x86/asm/setup.h | 3 +++ lib/x86/setup.c | 57 ++++++++++++++++++++++++++++++++++++++++++++ x86/efi/efistart64.S | 21 ++++++++++++++++ 4 files changed, 82 insertions(+)