Message ID | 1544049446-6359-3-git-send-email-liam.merwick@oracle.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | QEMU changes to do PVH boot | expand |
On Wed, Dec 05, 2018 at 10:37:25PM +0000, Liam Merwick wrote: > From: Liam Merwick <Liam.Merwick@oracle.com> > > Add support to read the PVH Entry address from an ELF note in the > uncompressed kernel binary (as defined by the x86/HVM direct boot ABI). > This 32-bit entry point will be used by QEMU to load the kernel in the > guest and jump into the kernel entry point. > > For now, a call to this function is added in pc_memory_init() to read the > address - a future patch will use the entry point. > > Signed-off-by: Liam Merwick <Liam.Merwick@oracle.com> > --- > hw/i386/pc.c | 272 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++- > include/elf.h | 10 +++ > 2 files changed, 281 insertions(+), 1 deletion(-) > > diff --git a/hw/i386/pc.c b/hw/i386/pc.c > index f095725dbab2..056aa46d99b9 100644 > --- a/hw/i386/pc.c > +++ b/hw/i386/pc.c > @@ -109,6 +109,9 @@ static struct e820_entry *e820_table; > static unsigned e820_entries; > struct hpet_fw_config hpet_cfg = {.count = UINT8_MAX}; > > +/* Physical Address of PVH entry point read from kernel ELF NOTE */ > +static size_t pvh_start_addr; > + > void gsi_handler(void *opaque, int n, int level) > { > GSIState *s = opaque; > @@ -834,6 +837,267 @@ struct setup_data { > uint8_t data[0]; > } __attribute__((packed)); > > +/* > + * Search through the ELF Notes for an entry with the given > + * ELF Note type > + */ > +static void *get_elf_note_type(void *ehdr, void *phdr, bool elf_is64, > + size_t elf_note_type) Generic ELF code. Can you put it in hw/core/loader.c? > +{ > + void *nhdr = NULL; > + size_t nhdr_size = elf_is64 ? sizeof(Elf64_Nhdr) : sizeof(Elf32_Nhdr); > + size_t elf_note_entry_sz = 0; > + size_t phdr_off; > + size_t phdr_align; > + size_t phdr_memsz; > + size_t nhdr_namesz; > + size_t nhdr_descsz; > + size_t note_type; The macro tricks used by hw/core/loader.c are nasty, but I think they get the types right. Here the Elf64 on 32-bit host case is definitely broken due to using size_t. Perhaps 64-on-32 isn't supported, but getting the types right is worth discussing. > + > + phdr_off = elf_is64 ? > + ((Elf64_Phdr *)phdr)->p_offset : ((Elf32_Phdr *)phdr)->p_offset; > + phdr_align = elf_is64 ? > + ((Elf64_Phdr *)phdr)->p_align : ((Elf32_Phdr *)phdr)->p_align; > + phdr_memsz = elf_is64 ? > + ((Elf64_Phdr *)phdr)->p_memsz : ((Elf32_Phdr *)phdr)->p_memsz; > + > + nhdr = ehdr + phdr_off; The ELF file is untrusted. All inputs must be validated. phdr_off could be an bogus/malicious value. > + note_type = elf_is64 ? > + ((Elf64_Nhdr *)nhdr)->n_type : ((Elf32_Nhdr *)nhdr)->n_type; > + nhdr_namesz = elf_is64 ? > + ((Elf64_Nhdr *)nhdr)->n_namesz : ((Elf32_Nhdr *)nhdr)->n_namesz; > + nhdr_descsz = elf_is64 ? > + ((Elf64_Nhdr *)nhdr)->n_descsz : ((Elf32_Nhdr *)nhdr)->n_descsz; > + > + while (note_type != elf_note_type) { > + elf_note_entry_sz = nhdr_size + > + QEMU_ALIGN_UP(nhdr_namesz, phdr_align) + > + QEMU_ALIGN_UP(nhdr_descsz, phdr_align); > + > + /* > + * Verify that we haven't exceeded the end of the ELF Note section. > + * If we have, then there is no note of the given type present > + * in the ELF Notes. > + */ > + if (phdr_off + phdr_memsz < ((nhdr - ehdr) + elf_note_entry_sz)) { > + error_report("Note type (0x%lx) not found in ELF Note section", > + elf_note_type); > + return NULL; > + } > + > + /* skip to the next ELF Note entry */ > + nhdr += elf_note_entry_sz; > + note_type = elf_is64 ? > + ((Elf64_Nhdr *)nhdr)->n_type : ((Elf32_Nhdr *)nhdr)->n_type; > + nhdr_namesz = elf_is64 ? > + ((Elf64_Nhdr *)nhdr)->n_namesz : ((Elf32_Nhdr *)nhdr)->n_namesz; > + nhdr_descsz = elf_is64 ? > + ((Elf64_Nhdr *)nhdr)->n_descsz : ((Elf32_Nhdr *)nhdr)->n_descsz; > + } > + > + return nhdr; > +} > + > +/* > + * The entry point into the kernel for PVH boot is different from > + * the native entry point. The PVH entry is defined by the x86/HVM > + * direct boot ABI and is available in an ELFNOTE in the kernel binary. > + * This function reads the ELF headers of the binary specified on the > + * command line by -kernel (path contained in 'filename') and discovers > + * the PVH entry address from the appropriate ELF Note. > + * > + * The address of the PVH entry point is saved to the 'pvh_start_addr' > + * global variable. The ELF class of the binary is returned via 'elfclass' > + * (although the entry point is 32-bit, the kernel binary can be either > + * 32-bit or 64-bit). > + */ > +static bool read_pvh_start_addr_elf_note(const char *filename, > + unsigned char *elfclass) > +{ Can this be integrated into ELF loading? For example, could the elf loader take a function pointer to perform additional logic (e.g. extracting the PVH entry point)? That avoids reparsing the input file. > + void *ehdr = NULL; /* Cast to Elf64_Ehdr or Elf32_Ehdr */ > + void *phdr = NULL; /* Cast to Elf64_Phdr or Elf32_Phdr */ > + void *nhdr = NULL; /* Cast to Elf64_Nhdr or Elf32_Nhdr */ > + struct stat statbuf; > + size_t ehdr_size; > + size_t phdr_size; > + size_t nhdr_size; > + size_t elf_note_data_addr; > + /* Ehdr fields */ > + size_t ehdr_poff; > + /* Phdr fields */ > + size_t phdr_off; > + size_t phdr_align; > + size_t phdr_memsz; > + size_t phdr_type; > + /* Nhdr fields */ > + size_t nhdr_namesz; > + size_t nhdr_descsz; > + bool elf_is64; > + FILE *file; > + union { > + Elf32_Ehdr h32; > + Elf64_Ehdr h64; > + } elf_header; > + Error *err = NULL; > + > + pvh_start_addr = 0; > + > + if (filename == NULL) { > + return false; > + } > + > + file = fopen(filename, "rb"); > + if (file == NULL) { > + error_report("fopen(%s) failed", filename); > + return false; > + } > + > + if (fstat(fileno(file), &statbuf) < 0) { > + error_report("fstat() failed on file (%s)", filename); > + return false; > + } > + > + load_elf_hdr(filename, &elf_header, &elf_is64, &err); > + if (err) { > + error_free(err); > + fclose(file); > + return false; > + } > + > + *elfclass = elf_is64 ? > + elf_header.h64.e_ident[EI_CLASS] : elf_header.h32.e_ident[EI_CLASS]; > + if (*elfclass == ELFCLASSNONE) { > + error_report("kernel binary (%s) is ELFCLASSNONE", filename); > + fclose(file); > + return false; > + } > + > + ehdr_size = elf_is64 ? sizeof(Elf64_Ehdr) : sizeof(Elf32_Ehdr); > + phdr_size = elf_is64 ? sizeof(Elf64_Phdr) : sizeof(Elf32_Phdr); > + nhdr_size = elf_is64 ? sizeof(Elf64_Nhdr) : sizeof(Elf32_Nhdr); > + > + /* We have already validated the ELF header when calling elf_load_hdr() */ > + > + ehdr = mmap(0, statbuf.st_size, > + PROT_READ | PROT_WRITE, MAP_PRIVATE, fileno(file), 0); > + if (ehdr == MAP_FAILED) { > + error_report("Failed to mmap kernel binary (%s)", filename); > + goto done; > + } > + > + /* > + * Search through the program execution header for the > + * ELF Note section. > + */ > + > + ehdr_poff = elf_is64 ? > + ((Elf64_Ehdr *)(ehdr))->e_phoff : ((Elf32_Ehdr *)(ehdr))->e_phoff; > + if (statbuf.st_size < (ehdr_size + ehdr_poff)) { > + error_report("ELF NOTE section exceeds file (%s) size", > + filename); > + goto done; > + } > + > + phdr = ehdr + ehdr_poff; > + phdr_type = elf_is64 ? > + ((Elf64_Phdr *)phdr)->p_type : ((Elf32_Phdr *)phdr)->p_type; > + while (phdr != NULL && phdr_type != PT_NOTE) { > + if (statbuf.st_size < ((phdr - ehdr) + phdr_size)) { > + error_report("ELF Program headers in file (%s) too short", > + filename); > + goto done; > + } > + phdr += phdr_size; > + phdr_type = elf_is64 ? > + ((Elf64_Phdr *)phdr)->p_type : ((Elf32_Phdr *)phdr)->p_type; > + } > + > + phdr_off = elf_is64 ? > + ((Elf64_Phdr *)phdr)->p_offset : ((Elf32_Phdr *)phdr)->p_offset; > + phdr_align = elf_is64 ? > + ((Elf64_Phdr *)phdr)->p_align : ((Elf32_Phdr *)phdr)->p_align; > + phdr_memsz = elf_is64 ? > + ((Elf64_Phdr *)phdr)->p_memsz : ((Elf32_Phdr *)phdr)->p_memsz; > + > + /* > + * check that the start of the ELF Note section is within the bounds > + * of the kernel ELF binary > + */ > + if (statbuf.st_size < (ehdr_poff + phdr_size + phdr_off)) { > + error_report("Start of ELF note section outside of file (%s) bounds", > + filename); > + goto done; > + } > + /* > + * check that the end of the ELF Note section is within the bounds > + * of the kernel ELF binary > + */ > + if (statbuf.st_size < (phdr_off + phdr_memsz)) { > + error_report("End of ELF note section outside of file (%s) bounds", > + filename); > + goto done; > + } > + > + /* > + * Search through the ELF Notes for an entry with the > + * Physical Address (PA) of the PVH entry point. > + */ > + nhdr = get_elf_note_type(ehdr, phdr, elf_is64, XEN_ELFNOTE_PHYS32_ENTRY); > + if (nhdr == NULL) { > + error_report("No PVH Entry details in kernel (%s) ELF Note section", > + filename); > + goto done; > + } > + > + /* > + * Verify that the returned ELF Note header doesn't exceed the > + * end of the kernel file > + */ > + if (statbuf.st_size < ((nhdr - ehdr))) { > + error_report("ELF Nhdr offset (0x%lx) exceeds file (%s) bounds (%ld)", > + (nhdr - ehdr), filename, statbuf.st_size); > + goto done; > + } > + > + nhdr_namesz = elf_is64 ? > + ((Elf64_Nhdr *)nhdr)->n_namesz : ((Elf32_Nhdr *)nhdr)->n_namesz; > + nhdr_descsz = elf_is64 ? > + ((Elf64_Nhdr *)nhdr)->n_descsz : ((Elf32_Nhdr *)nhdr)->n_descsz; > + > + /* > + * Verify that the ELF Note contents don't exceed the end of the > + * kernel file > + */ > + if (statbuf.st_size < ((nhdr - ehdr)) + nhdr_size + > + QEMU_ALIGN_UP(nhdr_namesz, phdr_align) + > + QEMU_ALIGN_UP(nhdr_descsz, phdr_align)) { > + error_report("ELF Nhdr contents (0x%lx) exceeds file bounds (%ld)", > + (nhdr - ehdr) + nhdr_size + QEMU_ALIGN_UP(nhdr_namesz, phdr_align) + > + QEMU_ALIGN_UP(nhdr_descsz, phdr_align), statbuf.st_size); > + goto done; > + } > + > + elf_note_data_addr = > + (size_t)nhdr + nhdr_size + QEMU_ALIGN_UP(nhdr_namesz, phdr_align); > + > + pvh_start_addr = *(size_t *)elf_note_data_addr; > + > + /* > + * Verify that the PVH Entry point address does not exceed the > + * bounds of the kernel file. > + */ > + if (statbuf.st_size < pvh_start_addr) { > + error_report("PVH ELF note addr (0x%lx) exceeds file (%s) bounds (%ld)", > + (elf_note_data_addr - (size_t)ehdr), filename, statbuf.st_size); > + pvh_start_addr = 0; > + goto done; > + } > + > +done: > + (void) munmap(ehdr, statbuf.st_size); > + return pvh_start_addr != 0; > +} > + > static void load_linux(PCMachineState *pcms, > FWCfgState *fw_cfg) > { > @@ -1334,9 +1598,11 @@ void pc_memory_init(PCMachineState *pcms, > int linux_boot, i; > MemoryRegion *ram, *option_rom_mr; > MemoryRegion *ram_below_4g, *ram_above_4g; > - FWCfgState *fw_cfg; > + FWCfgState *fw_cfg = NULL; > + unsigned char class = ELFCLASSNONE; > MachineState *machine = MACHINE(pcms); > PCMachineClass *pcmc = PC_MACHINE_GET_CLASS(pcms); > + const char *kernel_filename = machine->kernel_filename; > > assert(machine->ram_size == pcms->below_4g_mem_size + > pcms->above_4g_mem_size); > @@ -1418,6 +1684,10 @@ void pc_memory_init(PCMachineState *pcms, > &machine->device_memory->mr); > } > > + if (linux_boot) { > + read_pvh_start_addr_elf_note(kernel_filename, &class); > + } > + > /* Initialize PC system firmware */ > pc_system_firmware_init(rom_memory, !pcmc->pci_enabled); > > diff --git a/include/elf.h b/include/elf.h > index c151164b63da..1f82c7a7124b 100644 > --- a/include/elf.h > +++ b/include/elf.h > @@ -1585,6 +1585,16 @@ typedef struct elf64_shdr { > #define NT_ARM_HW_WATCH 0x403 /* ARM hardware watchpoint registers */ > #define NT_ARM_SYSTEM_CALL 0x404 /* ARM system call number */ > > +/* > + * Physical entry point into the kernel. > + * > + * 32bit entry point into the kernel. When requested to launch the > + * guest kernel, use this entry point to launch the guest in 32-bit > + * protected mode with paging disabled. > + * > + * [ Corresponding definition in Linux kernel: include/xen/interface/elfnote.h ] > + */ > +#define XEN_ELFNOTE_PHYS32_ENTRY 18 /* 0x12 */ > > /* Note header in a PT_NOTE section */ > typedef struct elf32_note { > -- > 1.8.3.1 >
Thanks Stefan for the review - comments inline. On 11/12/2018 14:17, Stefan Hajnoczi wrote: > On Wed, Dec 05, 2018 at 10:37:25PM +0000, Liam Merwick wrote: >> From: Liam Merwick <Liam.Merwick@oracle.com> >> >> Add support to read the PVH Entry address from an ELF note in the >> uncompressed kernel binary (as defined by the x86/HVM direct boot ABI). >> This 32-bit entry point will be used by QEMU to load the kernel in the >> guest and jump into the kernel entry point. >> >> For now, a call to this function is added in pc_memory_init() to read the >> address - a future patch will use the entry point. >> >> Signed-off-by: Liam Merwick <Liam.Merwick@oracle.com> >> --- >> hw/i386/pc.c | 272 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++- >> include/elf.h | 10 +++ >> 2 files changed, 281 insertions(+), 1 deletion(-) >> >> diff --git a/hw/i386/pc.c b/hw/i386/pc.c >> index f095725dbab2..056aa46d99b9 100644 >> --- a/hw/i386/pc.c >> +++ b/hw/i386/pc.c >> @@ -109,6 +109,9 @@ static struct e820_entry *e820_table; >> static unsigned e820_entries; >> struct hpet_fw_config hpet_cfg = {.count = UINT8_MAX}; >> >> +/* Physical Address of PVH entry point read from kernel ELF NOTE */ >> +static size_t pvh_start_addr; >> + >> void gsi_handler(void *opaque, int n, int level) >> { >> GSIState *s = opaque; >> @@ -834,6 +837,267 @@ struct setup_data { >> uint8_t data[0]; >> } __attribute__((packed)); >> >> +/* >> + * Search through the ELF Notes for an entry with the given >> + * ELF Note type >> + */ >> +static void *get_elf_note_type(void *ehdr, void *phdr, bool elf_is64, >> + size_t elf_note_type) > > Generic ELF code. Can you put it in hw/core/loader.c? I've added a modified/slimmed down version to include/hw/elf_ops.h (which now handles 32 and 64 bit as you mention below). I've put this in a separate commit. > >> +{ >> + void *nhdr = NULL; >> + size_t nhdr_size = elf_is64 ? sizeof(Elf64_Nhdr) : sizeof(Elf32_Nhdr); >> + size_t elf_note_entry_sz = 0; >> + size_t phdr_off; >> + size_t phdr_align; >> + size_t phdr_memsz; >> + size_t nhdr_namesz; >> + size_t nhdr_descsz; >> + size_t note_type; > > The macro tricks used by hw/core/loader.c are nasty, but I think they > get the types right. Here the Elf64 on 32-bit host case is definitely > broken due to using size_t. Perhaps 64-on-32 isn't supported, but > getting the types right is worth discussing. > >> + >> + phdr_off = elf_is64 ? >> + ((Elf64_Phdr *)phdr)->p_offset : ((Elf32_Phdr *)phdr)->p_offset; >> + phdr_align = elf_is64 ? >> + ((Elf64_Phdr *)phdr)->p_align : ((Elf32_Phdr *)phdr)->p_align; >> + phdr_memsz = elf_is64 ? >> + ((Elf64_Phdr *)phdr)->p_memsz : ((Elf32_Phdr *)phdr)->p_memsz; >> + >> + nhdr = ehdr + phdr_off; > > The ELF file is untrusted. All inputs must be validated. phdr_off > could be an bogus/malicious value. Most of the parsing of the ELF binary goes away due to moving to parse during elf_load() - more info below. > >> + note_type = elf_is64 ? >> + ((Elf64_Nhdr *)nhdr)->n_type : ((Elf32_Nhdr *)nhdr)->n_type; >> + nhdr_namesz = elf_is64 ? >> + ((Elf64_Nhdr *)nhdr)->n_namesz : ((Elf32_Nhdr *)nhdr)->n_namesz; >> + nhdr_descsz = elf_is64 ? >> + ((Elf64_Nhdr *)nhdr)->n_descsz : ((Elf32_Nhdr *)nhdr)->n_descsz; >> + >> + while (note_type != elf_note_type) { >> + elf_note_entry_sz = nhdr_size + >> + QEMU_ALIGN_UP(nhdr_namesz, phdr_align) + >> + QEMU_ALIGN_UP(nhdr_descsz, phdr_align); >> + >> + /* >> + * Verify that we haven't exceeded the end of the ELF Note section. >> + * If we have, then there is no note of the given type present >> + * in the ELF Notes. >> + */ >> + if (phdr_off + phdr_memsz < ((nhdr - ehdr) + elf_note_entry_sz)) { >> + error_report("Note type (0x%lx) not found in ELF Note section", >> + elf_note_type); >> + return NULL; >> + } >> + >> + /* skip to the next ELF Note entry */ >> + nhdr += elf_note_entry_sz; >> + note_type = elf_is64 ? >> + ((Elf64_Nhdr *)nhdr)->n_type : ((Elf32_Nhdr *)nhdr)->n_type; >> + nhdr_namesz = elf_is64 ? >> + ((Elf64_Nhdr *)nhdr)->n_namesz : ((Elf32_Nhdr *)nhdr)->n_namesz; >> + nhdr_descsz = elf_is64 ? >> + ((Elf64_Nhdr *)nhdr)->n_descsz : ((Elf32_Nhdr *)nhdr)->n_descsz; >> + } >> + >> + return nhdr; >> +} >> + >> +/* >> + * The entry point into the kernel for PVH boot is different from >> + * the native entry point. The PVH entry is defined by the x86/HVM >> + * direct boot ABI and is available in an ELFNOTE in the kernel binary. >> + * This function reads the ELF headers of the binary specified on the >> + * command line by -kernel (path contained in 'filename') and discovers >> + * the PVH entry address from the appropriate ELF Note. >> + * >> + * The address of the PVH entry point is saved to the 'pvh_start_addr' >> + * global variable. The ELF class of the binary is returned via 'elfclass' >> + * (although the entry point is 32-bit, the kernel binary can be either >> + * 32-bit or 64-bit). >> + */ >> +static bool read_pvh_start_addr_elf_note(const char *filename, >> + unsigned char *elfclass) >> +{ > > Can this be integrated into ELF loading? For example, could the elf > loader take a function pointer to perform additional logic (e.g. > extracting the PVH entry point)? That avoids reparsing the input file. I have rewritten this considerably based on that suggestion. The reading of the PVH entry point is now done in a single pass during elf_load() - I added a commit that adds a new optional function pointer to parse the ELF note type (which is passed in via the existing translate_opaque arg - the function already had 11 args so I didn't want to add more than one new arg). Another commit adds a function to elf_ops.h to find an ELF note matching a specific type and then the 4th patch to do the PVH boot is for the most part the same - just minor load_elfboot() changes and the addition of a read_pvh_start_addr() helper function for load_elf() v2 will follow in a sec. Regards, Liam > >> + void *ehdr = NULL; /* Cast to Elf64_Ehdr or Elf32_Ehdr */ >> + void *phdr = NULL; /* Cast to Elf64_Phdr or Elf32_Phdr */ >> + void *nhdr = NULL; /* Cast to Elf64_Nhdr or Elf32_Nhdr */ >> + struct stat statbuf; >> + size_t ehdr_size; >> + size_t phdr_size; >> + size_t nhdr_size; >> + size_t elf_note_data_addr; >> + /* Ehdr fields */ >> + size_t ehdr_poff; >> + /* Phdr fields */ >> + size_t phdr_off; >> + size_t phdr_align; >> + size_t phdr_memsz; >> + size_t phdr_type; >> + /* Nhdr fields */ >> + size_t nhdr_namesz; >> + size_t nhdr_descsz; >> + bool elf_is64; >> + FILE *file; >> + union { >> + Elf32_Ehdr h32; >> + Elf64_Ehdr h64; >> + } elf_header; >> + Error *err = NULL; >> + >> + pvh_start_addr = 0; >> + >> + if (filename == NULL) { >> + return false; >> + } >> + >> + file = fopen(filename, "rb"); >> + if (file == NULL) { >> + error_report("fopen(%s) failed", filename); >> + return false; >> + } >> + >> + if (fstat(fileno(file), &statbuf) < 0) { >> + error_report("fstat() failed on file (%s)", filename); >> + return false; >> + } >> + >> + load_elf_hdr(filename, &elf_header, &elf_is64, &err); >> + if (err) { >> + error_free(err); >> + fclose(file); >> + return false; >> + } >> + >> + *elfclass = elf_is64 ? >> + elf_header.h64.e_ident[EI_CLASS] : elf_header.h32.e_ident[EI_CLASS]; >> + if (*elfclass == ELFCLASSNONE) { >> + error_report("kernel binary (%s) is ELFCLASSNONE", filename); >> + fclose(file); >> + return false; >> + } >> + >> + ehdr_size = elf_is64 ? sizeof(Elf64_Ehdr) : sizeof(Elf32_Ehdr); >> + phdr_size = elf_is64 ? sizeof(Elf64_Phdr) : sizeof(Elf32_Phdr); >> + nhdr_size = elf_is64 ? sizeof(Elf64_Nhdr) : sizeof(Elf32_Nhdr); >> + >> + /* We have already validated the ELF header when calling elf_load_hdr() */ >> + >> + ehdr = mmap(0, statbuf.st_size, >> + PROT_READ | PROT_WRITE, MAP_PRIVATE, fileno(file), 0); >> + if (ehdr == MAP_FAILED) { >> + error_report("Failed to mmap kernel binary (%s)", filename); >> + goto done; >> + } >> + >> + /* >> + * Search through the program execution header for the >> + * ELF Note section. >> + */ >> + >> + ehdr_poff = elf_is64 ? >> + ((Elf64_Ehdr *)(ehdr))->e_phoff : ((Elf32_Ehdr *)(ehdr))->e_phoff; >> + if (statbuf.st_size < (ehdr_size + ehdr_poff)) { >> + error_report("ELF NOTE section exceeds file (%s) size", >> + filename); >> + goto done; >> + } >> + >> + phdr = ehdr + ehdr_poff; >> + phdr_type = elf_is64 ? >> + ((Elf64_Phdr *)phdr)->p_type : ((Elf32_Phdr *)phdr)->p_type; >> + while (phdr != NULL && phdr_type != PT_NOTE) { >> + if (statbuf.st_size < ((phdr - ehdr) + phdr_size)) { >> + error_report("ELF Program headers in file (%s) too short", >> + filename); >> + goto done; >> + } >> + phdr += phdr_size; >> + phdr_type = elf_is64 ? >> + ((Elf64_Phdr *)phdr)->p_type : ((Elf32_Phdr *)phdr)->p_type; >> + } >> + >> + phdr_off = elf_is64 ? >> + ((Elf64_Phdr *)phdr)->p_offset : ((Elf32_Phdr *)phdr)->p_offset; >> + phdr_align = elf_is64 ? >> + ((Elf64_Phdr *)phdr)->p_align : ((Elf32_Phdr *)phdr)->p_align; >> + phdr_memsz = elf_is64 ? >> + ((Elf64_Phdr *)phdr)->p_memsz : ((Elf32_Phdr *)phdr)->p_memsz; >> + >> + /* >> + * check that the start of the ELF Note section is within the bounds >> + * of the kernel ELF binary >> + */ >> + if (statbuf.st_size < (ehdr_poff + phdr_size + phdr_off)) { >> + error_report("Start of ELF note section outside of file (%s) bounds", >> + filename); >> + goto done; >> + } >> + /* >> + * check that the end of the ELF Note section is within the bounds >> + * of the kernel ELF binary >> + */ >> + if (statbuf.st_size < (phdr_off + phdr_memsz)) { >> + error_report("End of ELF note section outside of file (%s) bounds", >> + filename); >> + goto done; >> + } >> + >> + /* >> + * Search through the ELF Notes for an entry with the >> + * Physical Address (PA) of the PVH entry point. >> + */ >> + nhdr = get_elf_note_type(ehdr, phdr, elf_is64, XEN_ELFNOTE_PHYS32_ENTRY); >> + if (nhdr == NULL) { >> + error_report("No PVH Entry details in kernel (%s) ELF Note section", >> + filename); >> + goto done; >> + } >> + >> + /* >> + * Verify that the returned ELF Note header doesn't exceed the >> + * end of the kernel file >> + */ >> + if (statbuf.st_size < ((nhdr - ehdr))) { >> + error_report("ELF Nhdr offset (0x%lx) exceeds file (%s) bounds (%ld)", >> + (nhdr - ehdr), filename, statbuf.st_size); >> + goto done; >> + } >> + >> + nhdr_namesz = elf_is64 ? >> + ((Elf64_Nhdr *)nhdr)->n_namesz : ((Elf32_Nhdr *)nhdr)->n_namesz; >> + nhdr_descsz = elf_is64 ? >> + ((Elf64_Nhdr *)nhdr)->n_descsz : ((Elf32_Nhdr *)nhdr)->n_descsz; >> + >> + /* >> + * Verify that the ELF Note contents don't exceed the end of the >> + * kernel file >> + */ >> + if (statbuf.st_size < ((nhdr - ehdr)) + nhdr_size + >> + QEMU_ALIGN_UP(nhdr_namesz, phdr_align) + >> + QEMU_ALIGN_UP(nhdr_descsz, phdr_align)) { >> + error_report("ELF Nhdr contents (0x%lx) exceeds file bounds (%ld)", >> + (nhdr - ehdr) + nhdr_size + QEMU_ALIGN_UP(nhdr_namesz, phdr_align) + >> + QEMU_ALIGN_UP(nhdr_descsz, phdr_align), statbuf.st_size); >> + goto done; >> + } >> + >> + elf_note_data_addr = >> + (size_t)nhdr + nhdr_size + QEMU_ALIGN_UP(nhdr_namesz, phdr_align); >> + >> + pvh_start_addr = *(size_t *)elf_note_data_addr; >> + >> + /* >> + * Verify that the PVH Entry point address does not exceed the >> + * bounds of the kernel file. >> + */ >> + if (statbuf.st_size < pvh_start_addr) { >> + error_report("PVH ELF note addr (0x%lx) exceeds file (%s) bounds (%ld)", >> + (elf_note_data_addr - (size_t)ehdr), filename, statbuf.st_size); >> + pvh_start_addr = 0; >> + goto done; >> + } >> + >> +done: >> + (void) munmap(ehdr, statbuf.st_size); >> + return pvh_start_addr != 0; >> +} >> + >> static void load_linux(PCMachineState *pcms, >> FWCfgState *fw_cfg) >> { >> @@ -1334,9 +1598,11 @@ void pc_memory_init(PCMachineState *pcms, >> int linux_boot, i; >> MemoryRegion *ram, *option_rom_mr; >> MemoryRegion *ram_below_4g, *ram_above_4g; >> - FWCfgState *fw_cfg; >> + FWCfgState *fw_cfg = NULL; >> + unsigned char class = ELFCLASSNONE; >> MachineState *machine = MACHINE(pcms); >> PCMachineClass *pcmc = PC_MACHINE_GET_CLASS(pcms); >> + const char *kernel_filename = machine->kernel_filename; >> >> assert(machine->ram_size == pcms->below_4g_mem_size + >> pcms->above_4g_mem_size); >> @@ -1418,6 +1684,10 @@ void pc_memory_init(PCMachineState *pcms, >> &machine->device_memory->mr); >> } >> >> + if (linux_boot) { >> + read_pvh_start_addr_elf_note(kernel_filename, &class); >> + } >> + >> /* Initialize PC system firmware */ >> pc_system_firmware_init(rom_memory, !pcmc->pci_enabled); >> >> diff --git a/include/elf.h b/include/elf.h >> index c151164b63da..1f82c7a7124b 100644 >> --- a/include/elf.h >> +++ b/include/elf.h >> @@ -1585,6 +1585,16 @@ typedef struct elf64_shdr { >> #define NT_ARM_HW_WATCH 0x403 /* ARM hardware watchpoint registers */ >> #define NT_ARM_SYSTEM_CALL 0x404 /* ARM system call number */ >> >> +/* >> + * Physical entry point into the kernel. >> + * >> + * 32bit entry point into the kernel. When requested to launch the >> + * guest kernel, use this entry point to launch the guest in 32-bit >> + * protected mode with paging disabled. >> + * >> + * [ Corresponding definition in Linux kernel: include/xen/interface/elfnote.h ] >> + */ >> +#define XEN_ELFNOTE_PHYS32_ENTRY 18 /* 0x12 */ >> >> /* Note header in a PT_NOTE section */ >> typedef struct elf32_note { >> -- >> 1.8.3.1 >>
diff --git a/hw/i386/pc.c b/hw/i386/pc.c index f095725dbab2..056aa46d99b9 100644 --- a/hw/i386/pc.c +++ b/hw/i386/pc.c @@ -109,6 +109,9 @@ static struct e820_entry *e820_table; static unsigned e820_entries; struct hpet_fw_config hpet_cfg = {.count = UINT8_MAX}; +/* Physical Address of PVH entry point read from kernel ELF NOTE */ +static size_t pvh_start_addr; + void gsi_handler(void *opaque, int n, int level) { GSIState *s = opaque; @@ -834,6 +837,267 @@ struct setup_data { uint8_t data[0]; } __attribute__((packed)); +/* + * Search through the ELF Notes for an entry with the given + * ELF Note type + */ +static void *get_elf_note_type(void *ehdr, void *phdr, bool elf_is64, + size_t elf_note_type) +{ + void *nhdr = NULL; + size_t nhdr_size = elf_is64 ? sizeof(Elf64_Nhdr) : sizeof(Elf32_Nhdr); + size_t elf_note_entry_sz = 0; + size_t phdr_off; + size_t phdr_align; + size_t phdr_memsz; + size_t nhdr_namesz; + size_t nhdr_descsz; + size_t note_type; + + phdr_off = elf_is64 ? + ((Elf64_Phdr *)phdr)->p_offset : ((Elf32_Phdr *)phdr)->p_offset; + phdr_align = elf_is64 ? + ((Elf64_Phdr *)phdr)->p_align : ((Elf32_Phdr *)phdr)->p_align; + phdr_memsz = elf_is64 ? + ((Elf64_Phdr *)phdr)->p_memsz : ((Elf32_Phdr *)phdr)->p_memsz; + + nhdr = ehdr + phdr_off; + note_type = elf_is64 ? + ((Elf64_Nhdr *)nhdr)->n_type : ((Elf32_Nhdr *)nhdr)->n_type; + nhdr_namesz = elf_is64 ? + ((Elf64_Nhdr *)nhdr)->n_namesz : ((Elf32_Nhdr *)nhdr)->n_namesz; + nhdr_descsz = elf_is64 ? + ((Elf64_Nhdr *)nhdr)->n_descsz : ((Elf32_Nhdr *)nhdr)->n_descsz; + + while (note_type != elf_note_type) { + elf_note_entry_sz = nhdr_size + + QEMU_ALIGN_UP(nhdr_namesz, phdr_align) + + QEMU_ALIGN_UP(nhdr_descsz, phdr_align); + + /* + * Verify that we haven't exceeded the end of the ELF Note section. + * If we have, then there is no note of the given type present + * in the ELF Notes. + */ + if (phdr_off + phdr_memsz < ((nhdr - ehdr) + elf_note_entry_sz)) { + error_report("Note type (0x%lx) not found in ELF Note section", + elf_note_type); + return NULL; + } + + /* skip to the next ELF Note entry */ + nhdr += elf_note_entry_sz; + note_type = elf_is64 ? + ((Elf64_Nhdr *)nhdr)->n_type : ((Elf32_Nhdr *)nhdr)->n_type; + nhdr_namesz = elf_is64 ? + ((Elf64_Nhdr *)nhdr)->n_namesz : ((Elf32_Nhdr *)nhdr)->n_namesz; + nhdr_descsz = elf_is64 ? + ((Elf64_Nhdr *)nhdr)->n_descsz : ((Elf32_Nhdr *)nhdr)->n_descsz; + } + + return nhdr; +} + +/* + * The entry point into the kernel for PVH boot is different from + * the native entry point. The PVH entry is defined by the x86/HVM + * direct boot ABI and is available in an ELFNOTE in the kernel binary. + * This function reads the ELF headers of the binary specified on the + * command line by -kernel (path contained in 'filename') and discovers + * the PVH entry address from the appropriate ELF Note. + * + * The address of the PVH entry point is saved to the 'pvh_start_addr' + * global variable. The ELF class of the binary is returned via 'elfclass' + * (although the entry point is 32-bit, the kernel binary can be either + * 32-bit or 64-bit). + */ +static bool read_pvh_start_addr_elf_note(const char *filename, + unsigned char *elfclass) +{ + void *ehdr = NULL; /* Cast to Elf64_Ehdr or Elf32_Ehdr */ + void *phdr = NULL; /* Cast to Elf64_Phdr or Elf32_Phdr */ + void *nhdr = NULL; /* Cast to Elf64_Nhdr or Elf32_Nhdr */ + struct stat statbuf; + size_t ehdr_size; + size_t phdr_size; + size_t nhdr_size; + size_t elf_note_data_addr; + /* Ehdr fields */ + size_t ehdr_poff; + /* Phdr fields */ + size_t phdr_off; + size_t phdr_align; + size_t phdr_memsz; + size_t phdr_type; + /* Nhdr fields */ + size_t nhdr_namesz; + size_t nhdr_descsz; + bool elf_is64; + FILE *file; + union { + Elf32_Ehdr h32; + Elf64_Ehdr h64; + } elf_header; + Error *err = NULL; + + pvh_start_addr = 0; + + if (filename == NULL) { + return false; + } + + file = fopen(filename, "rb"); + if (file == NULL) { + error_report("fopen(%s) failed", filename); + return false; + } + + if (fstat(fileno(file), &statbuf) < 0) { + error_report("fstat() failed on file (%s)", filename); + return false; + } + + load_elf_hdr(filename, &elf_header, &elf_is64, &err); + if (err) { + error_free(err); + fclose(file); + return false; + } + + *elfclass = elf_is64 ? + elf_header.h64.e_ident[EI_CLASS] : elf_header.h32.e_ident[EI_CLASS]; + if (*elfclass == ELFCLASSNONE) { + error_report("kernel binary (%s) is ELFCLASSNONE", filename); + fclose(file); + return false; + } + + ehdr_size = elf_is64 ? sizeof(Elf64_Ehdr) : sizeof(Elf32_Ehdr); + phdr_size = elf_is64 ? sizeof(Elf64_Phdr) : sizeof(Elf32_Phdr); + nhdr_size = elf_is64 ? sizeof(Elf64_Nhdr) : sizeof(Elf32_Nhdr); + + /* We have already validated the ELF header when calling elf_load_hdr() */ + + ehdr = mmap(0, statbuf.st_size, + PROT_READ | PROT_WRITE, MAP_PRIVATE, fileno(file), 0); + if (ehdr == MAP_FAILED) { + error_report("Failed to mmap kernel binary (%s)", filename); + goto done; + } + + /* + * Search through the program execution header for the + * ELF Note section. + */ + + ehdr_poff = elf_is64 ? + ((Elf64_Ehdr *)(ehdr))->e_phoff : ((Elf32_Ehdr *)(ehdr))->e_phoff; + if (statbuf.st_size < (ehdr_size + ehdr_poff)) { + error_report("ELF NOTE section exceeds file (%s) size", + filename); + goto done; + } + + phdr = ehdr + ehdr_poff; + phdr_type = elf_is64 ? + ((Elf64_Phdr *)phdr)->p_type : ((Elf32_Phdr *)phdr)->p_type; + while (phdr != NULL && phdr_type != PT_NOTE) { + if (statbuf.st_size < ((phdr - ehdr) + phdr_size)) { + error_report("ELF Program headers in file (%s) too short", + filename); + goto done; + } + phdr += phdr_size; + phdr_type = elf_is64 ? + ((Elf64_Phdr *)phdr)->p_type : ((Elf32_Phdr *)phdr)->p_type; + } + + phdr_off = elf_is64 ? + ((Elf64_Phdr *)phdr)->p_offset : ((Elf32_Phdr *)phdr)->p_offset; + phdr_align = elf_is64 ? + ((Elf64_Phdr *)phdr)->p_align : ((Elf32_Phdr *)phdr)->p_align; + phdr_memsz = elf_is64 ? + ((Elf64_Phdr *)phdr)->p_memsz : ((Elf32_Phdr *)phdr)->p_memsz; + + /* + * check that the start of the ELF Note section is within the bounds + * of the kernel ELF binary + */ + if (statbuf.st_size < (ehdr_poff + phdr_size + phdr_off)) { + error_report("Start of ELF note section outside of file (%s) bounds", + filename); + goto done; + } + /* + * check that the end of the ELF Note section is within the bounds + * of the kernel ELF binary + */ + if (statbuf.st_size < (phdr_off + phdr_memsz)) { + error_report("End of ELF note section outside of file (%s) bounds", + filename); + goto done; + } + + /* + * Search through the ELF Notes for an entry with the + * Physical Address (PA) of the PVH entry point. + */ + nhdr = get_elf_note_type(ehdr, phdr, elf_is64, XEN_ELFNOTE_PHYS32_ENTRY); + if (nhdr == NULL) { + error_report("No PVH Entry details in kernel (%s) ELF Note section", + filename); + goto done; + } + + /* + * Verify that the returned ELF Note header doesn't exceed the + * end of the kernel file + */ + if (statbuf.st_size < ((nhdr - ehdr))) { + error_report("ELF Nhdr offset (0x%lx) exceeds file (%s) bounds (%ld)", + (nhdr - ehdr), filename, statbuf.st_size); + goto done; + } + + nhdr_namesz = elf_is64 ? + ((Elf64_Nhdr *)nhdr)->n_namesz : ((Elf32_Nhdr *)nhdr)->n_namesz; + nhdr_descsz = elf_is64 ? + ((Elf64_Nhdr *)nhdr)->n_descsz : ((Elf32_Nhdr *)nhdr)->n_descsz; + + /* + * Verify that the ELF Note contents don't exceed the end of the + * kernel file + */ + if (statbuf.st_size < ((nhdr - ehdr)) + nhdr_size + + QEMU_ALIGN_UP(nhdr_namesz, phdr_align) + + QEMU_ALIGN_UP(nhdr_descsz, phdr_align)) { + error_report("ELF Nhdr contents (0x%lx) exceeds file bounds (%ld)", + (nhdr - ehdr) + nhdr_size + QEMU_ALIGN_UP(nhdr_namesz, phdr_align) + + QEMU_ALIGN_UP(nhdr_descsz, phdr_align), statbuf.st_size); + goto done; + } + + elf_note_data_addr = + (size_t)nhdr + nhdr_size + QEMU_ALIGN_UP(nhdr_namesz, phdr_align); + + pvh_start_addr = *(size_t *)elf_note_data_addr; + + /* + * Verify that the PVH Entry point address does not exceed the + * bounds of the kernel file. + */ + if (statbuf.st_size < pvh_start_addr) { + error_report("PVH ELF note addr (0x%lx) exceeds file (%s) bounds (%ld)", + (elf_note_data_addr - (size_t)ehdr), filename, statbuf.st_size); + pvh_start_addr = 0; + goto done; + } + +done: + (void) munmap(ehdr, statbuf.st_size); + return pvh_start_addr != 0; +} + static void load_linux(PCMachineState *pcms, FWCfgState *fw_cfg) { @@ -1334,9 +1598,11 @@ void pc_memory_init(PCMachineState *pcms, int linux_boot, i; MemoryRegion *ram, *option_rom_mr; MemoryRegion *ram_below_4g, *ram_above_4g; - FWCfgState *fw_cfg; + FWCfgState *fw_cfg = NULL; + unsigned char class = ELFCLASSNONE; MachineState *machine = MACHINE(pcms); PCMachineClass *pcmc = PC_MACHINE_GET_CLASS(pcms); + const char *kernel_filename = machine->kernel_filename; assert(machine->ram_size == pcms->below_4g_mem_size + pcms->above_4g_mem_size); @@ -1418,6 +1684,10 @@ void pc_memory_init(PCMachineState *pcms, &machine->device_memory->mr); } + if (linux_boot) { + read_pvh_start_addr_elf_note(kernel_filename, &class); + } + /* Initialize PC system firmware */ pc_system_firmware_init(rom_memory, !pcmc->pci_enabled); diff --git a/include/elf.h b/include/elf.h index c151164b63da..1f82c7a7124b 100644 --- a/include/elf.h +++ b/include/elf.h @@ -1585,6 +1585,16 @@ typedef struct elf64_shdr { #define NT_ARM_HW_WATCH 0x403 /* ARM hardware watchpoint registers */ #define NT_ARM_SYSTEM_CALL 0x404 /* ARM system call number */ +/* + * Physical entry point into the kernel. + * + * 32bit entry point into the kernel. When requested to launch the + * guest kernel, use this entry point to launch the guest in 32-bit + * protected mode with paging disabled. + * + * [ Corresponding definition in Linux kernel: include/xen/interface/elfnote.h ] + */ +#define XEN_ELFNOTE_PHYS32_ENTRY 18 /* 0x12 */ /* Note header in a PT_NOTE section */ typedef struct elf32_note {