Message ID | 20220623080208.2214-1-jgross@suse.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | xen: consider alloc-only segments when loading PV dom0 kernel | expand |
On 23.06.2022 10:02, Juergen Gross wrote: > When loading the dom0 kernel for PV mode, the first free usable memory > location after the kernel needs to take segments into account, which > have only the ALLOC flag set, but are not specified to be loaded in > the program headers of the ELF file. > > This is e.g. a problem for Linux kernels from 5.19 onwards, as those > can have a final NOLOAD section at the end, which must not be used by > e.g. the start_info structure or the initial page tables allocated by > the hypervisor. > > Signed-off-by: Juergen Gross <jgross@suse.com> > --- > xen/common/libelf/libelf-loader.c | 33 +++++++++++++++++++++++++++++++ > 1 file changed, 33 insertions(+) > > diff --git a/xen/common/libelf/libelf-loader.c b/xen/common/libelf/libelf-loader.c > index 629cc0d3e6..4b0e3ced55 100644 > --- a/xen/common/libelf/libelf-loader.c > +++ b/xen/common/libelf/libelf-loader.c > @@ -467,7 +467,9 @@ do { \ > void elf_parse_binary(struct elf_binary *elf) > { > ELF_HANDLE_DECL(elf_phdr) phdr; > + ELF_HANDLE_DECL(elf_shdr) shdr; > uint64_t low = -1, high = 0, paddr, memsz; > + uint64_t vlow = -1, vhigh = 0, vaddr, voff; > unsigned i, count; > > count = elf_phdr_count(elf); > @@ -480,6 +482,7 @@ void elf_parse_binary(struct elf_binary *elf) > if ( !elf_phdr_is_loadable(elf, phdr) ) > continue; > paddr = elf_uval(elf, phdr, p_paddr); > + vaddr = elf_uval(elf, phdr, p_vaddr); > memsz = elf_uval(elf, phdr, p_memsz); > elf_msg(elf, "ELF: phdr: paddr=%#" PRIx64 " memsz=%#" PRIx64 "\n", > paddr, memsz); > @@ -487,7 +490,37 @@ void elf_parse_binary(struct elf_binary *elf) > low = paddr; > if ( high < paddr + memsz ) > high = paddr + memsz; > + if ( vlow > vaddr ) > + vlow = vaddr; > + if ( vhigh < vaddr + memsz ) > + vhigh = vaddr + memsz; > } > + > + voff = vhigh - high; > + > + count = elf_shdr_count(elf); > + for ( i = 0; i < count; i++ ) > + { > + shdr = elf_shdr_by_index(elf, i); > + if ( !elf_access_ok(elf, ELF_HANDLE_PTRVAL(shdr), 1) ) > + /* input has an insane section header count field */ > + break; > + if ( !(elf_uval(elf, shdr, sh_flags) & SHF_ALLOC) ) > + continue; > + vaddr = elf_uval(elf, shdr, sh_addr); > + memsz = elf_uval(elf, shdr, sh_size); > + if ( vlow > vaddr ) > + { > + vlow = vaddr; > + low = vaddr - voff; > + } > + if ( vhigh < vaddr + memsz ) > + { > + vhigh = vaddr + memsz; > + high = vaddr + memsz - voff; > + } > + } As said in the reply to your problem report: The set of PHDRs doesn't cover all sections. For loading one should never need to resort to parsing section headers - in a loadable binary it is no error if there's no section table in the first place. (The title is also misleading, as you really mean sections there, not segments. Afaik there's no concept of "alloc" for segments, which are what program headers describe.) Also: Needing to fix this in the hypervisor would mean that Linux 5.19 and onwards cannot be booted on Xen without whichever fix backported. Finally, you changing libelf but referring to only Dom0 in the title looks inconsistent to me. Jan
On 23.06.22 11:04, Jan Beulich wrote: > On 23.06.2022 10:02, Juergen Gross wrote: >> When loading the dom0 kernel for PV mode, the first free usable memory >> location after the kernel needs to take segments into account, which >> have only the ALLOC flag set, but are not specified to be loaded in >> the program headers of the ELF file. >> >> This is e.g. a problem for Linux kernels from 5.19 onwards, as those >> can have a final NOLOAD section at the end, which must not be used by >> e.g. the start_info structure or the initial page tables allocated by >> the hypervisor. >> >> Signed-off-by: Juergen Gross <jgross@suse.com> >> --- >> xen/common/libelf/libelf-loader.c | 33 +++++++++++++++++++++++++++++++ >> 1 file changed, 33 insertions(+) >> >> diff --git a/xen/common/libelf/libelf-loader.c b/xen/common/libelf/libelf-loader.c >> index 629cc0d3e6..4b0e3ced55 100644 >> --- a/xen/common/libelf/libelf-loader.c >> +++ b/xen/common/libelf/libelf-loader.c >> @@ -467,7 +467,9 @@ do { \ >> void elf_parse_binary(struct elf_binary *elf) >> { >> ELF_HANDLE_DECL(elf_phdr) phdr; >> + ELF_HANDLE_DECL(elf_shdr) shdr; >> uint64_t low = -1, high = 0, paddr, memsz; >> + uint64_t vlow = -1, vhigh = 0, vaddr, voff; >> unsigned i, count; >> >> count = elf_phdr_count(elf); >> @@ -480,6 +482,7 @@ void elf_parse_binary(struct elf_binary *elf) >> if ( !elf_phdr_is_loadable(elf, phdr) ) >> continue; >> paddr = elf_uval(elf, phdr, p_paddr); >> + vaddr = elf_uval(elf, phdr, p_vaddr); >> memsz = elf_uval(elf, phdr, p_memsz); >> elf_msg(elf, "ELF: phdr: paddr=%#" PRIx64 " memsz=%#" PRIx64 "\n", >> paddr, memsz); >> @@ -487,7 +490,37 @@ void elf_parse_binary(struct elf_binary *elf) >> low = paddr; >> if ( high < paddr + memsz ) >> high = paddr + memsz; >> + if ( vlow > vaddr ) >> + vlow = vaddr; >> + if ( vhigh < vaddr + memsz ) >> + vhigh = vaddr + memsz; >> } >> + >> + voff = vhigh - high; >> + >> + count = elf_shdr_count(elf); >> + for ( i = 0; i < count; i++ ) >> + { >> + shdr = elf_shdr_by_index(elf, i); >> + if ( !elf_access_ok(elf, ELF_HANDLE_PTRVAL(shdr), 1) ) >> + /* input has an insane section header count field */ >> + break; >> + if ( !(elf_uval(elf, shdr, sh_flags) & SHF_ALLOC) ) >> + continue; >> + vaddr = elf_uval(elf, shdr, sh_addr); >> + memsz = elf_uval(elf, shdr, sh_size); >> + if ( vlow > vaddr ) >> + { >> + vlow = vaddr; >> + low = vaddr - voff; >> + } >> + if ( vhigh < vaddr + memsz ) >> + { >> + vhigh = vaddr + memsz; >> + high = vaddr + memsz - voff; >> + } >> + } > > As said in the reply to your problem report: The set of PHDRs doesn't > cover all sections. For loading one should never need to resort to > parsing section headers - in a loadable binary it is no error if > there's no section table in the first place. (The title is also The problem isn't the loading, but the memory usage after doing the loading. The hypervisor is placing page tables in a memory region the kernel has other plans with. > misleading, as you really mean sections there, not segments. Afaik > there's no concept of "alloc" for segments, which are what program > headers describe.) Sorry, will reword. > Also: Needing to fix this in the hypervisor would mean that Linux > 5.19 and onwards cannot be booted on Xen without whichever fix > backported. Correct. See my reply to the reply you mentioned above. > Finally, you changing libelf but referring to only Dom0 in the title > looks inconsistent to me. Hmm, yes. Will drop the dom0 aspect. Juergen
On 23.06.2022 11:08, Juergen Gross wrote: > On 23.06.22 11:04, Jan Beulich wrote: >> On 23.06.2022 10:02, Juergen Gross wrote: >>> When loading the dom0 kernel for PV mode, the first free usable memory >>> location after the kernel needs to take segments into account, which >>> have only the ALLOC flag set, but are not specified to be loaded in >>> the program headers of the ELF file. >>> >>> This is e.g. a problem for Linux kernels from 5.19 onwards, as those >>> can have a final NOLOAD section at the end, which must not be used by >>> e.g. the start_info structure or the initial page tables allocated by >>> the hypervisor. >>> >>> Signed-off-by: Juergen Gross <jgross@suse.com> >>> --- >>> xen/common/libelf/libelf-loader.c | 33 +++++++++++++++++++++++++++++++ >>> 1 file changed, 33 insertions(+) >>> >>> diff --git a/xen/common/libelf/libelf-loader.c b/xen/common/libelf/libelf-loader.c >>> index 629cc0d3e6..4b0e3ced55 100644 >>> --- a/xen/common/libelf/libelf-loader.c >>> +++ b/xen/common/libelf/libelf-loader.c >>> @@ -467,7 +467,9 @@ do { \ >>> void elf_parse_binary(struct elf_binary *elf) >>> { >>> ELF_HANDLE_DECL(elf_phdr) phdr; >>> + ELF_HANDLE_DECL(elf_shdr) shdr; >>> uint64_t low = -1, high = 0, paddr, memsz; >>> + uint64_t vlow = -1, vhigh = 0, vaddr, voff; >>> unsigned i, count; >>> >>> count = elf_phdr_count(elf); >>> @@ -480,6 +482,7 @@ void elf_parse_binary(struct elf_binary *elf) >>> if ( !elf_phdr_is_loadable(elf, phdr) ) >>> continue; >>> paddr = elf_uval(elf, phdr, p_paddr); >>> + vaddr = elf_uval(elf, phdr, p_vaddr); >>> memsz = elf_uval(elf, phdr, p_memsz); >>> elf_msg(elf, "ELF: phdr: paddr=%#" PRIx64 " memsz=%#" PRIx64 "\n", >>> paddr, memsz); >>> @@ -487,7 +490,37 @@ void elf_parse_binary(struct elf_binary *elf) >>> low = paddr; >>> if ( high < paddr + memsz ) >>> high = paddr + memsz; >>> + if ( vlow > vaddr ) >>> + vlow = vaddr; >>> + if ( vhigh < vaddr + memsz ) >>> + vhigh = vaddr + memsz; >>> } >>> + >>> + voff = vhigh - high; >>> + >>> + count = elf_shdr_count(elf); >>> + for ( i = 0; i < count; i++ ) >>> + { >>> + shdr = elf_shdr_by_index(elf, i); >>> + if ( !elf_access_ok(elf, ELF_HANDLE_PTRVAL(shdr), 1) ) >>> + /* input has an insane section header count field */ >>> + break; >>> + if ( !(elf_uval(elf, shdr, sh_flags) & SHF_ALLOC) ) >>> + continue; >>> + vaddr = elf_uval(elf, shdr, sh_addr); >>> + memsz = elf_uval(elf, shdr, sh_size); >>> + if ( vlow > vaddr ) >>> + { >>> + vlow = vaddr; >>> + low = vaddr - voff; >>> + } >>> + if ( vhigh < vaddr + memsz ) >>> + { >>> + vhigh = vaddr + memsz; >>> + high = vaddr + memsz - voff; >>> + } >>> + } >> >> As said in the reply to your problem report: The set of PHDRs doesn't >> cover all sections. For loading one should never need to resort to >> parsing section headers - in a loadable binary it is no error if >> there's no section table in the first place. (The title is also > > The problem isn't the loading, but the memory usage after doing the > loading. The hypervisor is placing page tables in a memory region > the kernel has other plans with. But part of "loading" is to determine the extent of the binary, which is what the program headers (and only them) ought to describe. Note also that our "loading" includes correct handling of .bss-style parts of segments (i.e. their clearing): static elf_errorstatus elf_load_image(struct elf_binary *elf, elf_ptrval dst, elf_ptrval src, uint64_t filesz, uint64_t memsz) { elf_errorstatus rc; if ( filesz > ULONG_MAX || memsz > ULONG_MAX ) return -1; /* We trust the dom0 kernel image completely, so we don't care * about overruns etc. here. */ rc = elf_memcpy(elf->vcpu, ELF_UNSAFE_PTR(dst), ELF_UNSAFE_PTR(src), filesz); if ( rc != 0 ) return -1; rc = elf_memcpy(elf->vcpu, ELF_UNSAFE_PTR(dst + filesz), NULL, memsz - filesz); if ( rc != 0 ) return -1; return 0; } IOW in principle there's no need for the kernel to clear its .bss (a 2nd time). Provided, of course, the phdrs properly describe the entire image. Jan
diff --git a/xen/common/libelf/libelf-loader.c b/xen/common/libelf/libelf-loader.c index 629cc0d3e6..4b0e3ced55 100644 --- a/xen/common/libelf/libelf-loader.c +++ b/xen/common/libelf/libelf-loader.c @@ -467,7 +467,9 @@ do { \ void elf_parse_binary(struct elf_binary *elf) { ELF_HANDLE_DECL(elf_phdr) phdr; + ELF_HANDLE_DECL(elf_shdr) shdr; uint64_t low = -1, high = 0, paddr, memsz; + uint64_t vlow = -1, vhigh = 0, vaddr, voff; unsigned i, count; count = elf_phdr_count(elf); @@ -480,6 +482,7 @@ void elf_parse_binary(struct elf_binary *elf) if ( !elf_phdr_is_loadable(elf, phdr) ) continue; paddr = elf_uval(elf, phdr, p_paddr); + vaddr = elf_uval(elf, phdr, p_vaddr); memsz = elf_uval(elf, phdr, p_memsz); elf_msg(elf, "ELF: phdr: paddr=%#" PRIx64 " memsz=%#" PRIx64 "\n", paddr, memsz); @@ -487,7 +490,37 @@ void elf_parse_binary(struct elf_binary *elf) low = paddr; if ( high < paddr + memsz ) high = paddr + memsz; + if ( vlow > vaddr ) + vlow = vaddr; + if ( vhigh < vaddr + memsz ) + vhigh = vaddr + memsz; } + + voff = vhigh - high; + + count = elf_shdr_count(elf); + for ( i = 0; i < count; i++ ) + { + shdr = elf_shdr_by_index(elf, i); + if ( !elf_access_ok(elf, ELF_HANDLE_PTRVAL(shdr), 1) ) + /* input has an insane section header count field */ + break; + if ( !(elf_uval(elf, shdr, sh_flags) & SHF_ALLOC) ) + continue; + vaddr = elf_uval(elf, shdr, sh_addr); + memsz = elf_uval(elf, shdr, sh_size); + if ( vlow > vaddr ) + { + vlow = vaddr; + low = vaddr - voff; + } + if ( vhigh < vaddr + memsz ) + { + vhigh = vaddr + memsz; + high = vaddr + memsz - voff; + } + } + elf->pstart = low; elf->pend = high; elf_msg(elf, "ELF: memory: %#" PRIx64 " -> %#" PRIx64 "\n",
When loading the dom0 kernel for PV mode, the first free usable memory location after the kernel needs to take segments into account, which have only the ALLOC flag set, but are not specified to be loaded in the program headers of the ELF file. This is e.g. a problem for Linux kernels from 5.19 onwards, as those can have a final NOLOAD section at the end, which must not be used by e.g. the start_info structure or the initial page tables allocated by the hypervisor. Signed-off-by: Juergen Gross <jgross@suse.com> --- xen/common/libelf/libelf-loader.c | 33 +++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+)