diff mbox series

[RFC,52/73] x86/boot: Allow to do relocation for uncompressed kernel

Message ID 20240226143630.33643-53-jiangshanlai@gmail.com (mailing list archive)
State New, archived
Headers show
Series KVM: x86/PVM: Introduce a new hypervisor | expand

Commit Message

Lai Jiangshan Feb. 26, 2024, 2:36 p.m. UTC
From: Hou Wenlong <houwenlong.hwl@antgroup.com>

Relocation is currently only performed during the uncompression process.
However, in some situations, such as with security containers, the
uncompressed kernel can be booted directly. Therefore, it is useful to
allow for relocation of the uncompressed kernel. Taking inspiration from
the implementation in MIPS, a new section named ".data.relocs" is
reserved for relocations. The relocs tool can then append the
relocations into this section. Additionally, a helper function is
introduced to perform relocations during booting, similar to the
relocations in the bootloader. For PVH entry, relocation for the
pre-constructed page table should not be performed; otherwise, booting
will fail.

Signed-off-by: Hou Wenlong <houwenlong.hwl@antgroup.com>
Signed-off-by: Lai Jiangshan <jiangshan.ljs@antgroup.com>
---
 arch/x86/Kconfig                  | 20 +++++++++
 arch/x86/Makefile.postlink        |  9 +++-
 arch/x86/kernel/head64_identity.c | 70 +++++++++++++++++++++++++++++++
 arch/x86/kernel/vmlinux.lds.S     | 18 ++++++++
 4 files changed, 116 insertions(+), 1 deletion(-)
diff mbox series

Patch

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index a53b65499951..d02ef3bdb171 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -2183,6 +2183,26 @@  config RELOCATABLE
 	  it has been loaded at and the compile time physical address
 	  (CONFIG_PHYSICAL_START) is used as the minimum location.
 
+config RELOCATABLE_UNCOMPRESSED_KERNEL
+	bool
+	depends on RELOCATABLE
+	help
+	  A table of relocation data will be appended to the uncompressed
+	  kernel binary and parsed at boot to fix up the relocated kernel.
+
+config RELOCATION_TABLE_SIZE
+	hex "Relocation table size"
+	depends on RELOCATABLE_UNCOMPRESSED_KERNEL
+	range 0x0 0x01000000
+	default "0x00200000"
+	help
+	  This option allows the amount of space reserved for the table to be
+	  adjusted, although the default of 1Mb should be ok in most cases.
+
+	  The build will fail and a valid size suggested if this is too small.
+
+	  If unsure, leave at the default value.
+
 config X86_PIE
 	bool "Build a PIE kernel"
 	default n
diff --git a/arch/x86/Makefile.postlink b/arch/x86/Makefile.postlink
index fef2e977cc7d..c115692b67b2 100644
--- a/arch/x86/Makefile.postlink
+++ b/arch/x86/Makefile.postlink
@@ -4,7 +4,8 @@ 
 # ===========================================================================
 #
 # 1. Separate relocations from vmlinux into vmlinux.relocs.
-# 2. Strip relocations from vmlinux.
+# 2. Insert relocations table into vmlinux
+# 3. Strip relocations from vmlinux.
 
 PHONY := __archpost
 __archpost:
@@ -20,6 +21,9 @@  quiet_cmd_relocs = RELOCS  $(OUT_RELOCS)/$@.relocs
 	$(CMD_RELOCS) $@ > $(OUT_RELOCS)/$@.relocs; \
 	$(CMD_RELOCS) --abs-relocs $@
 
+quiet_cmd_insert_relocs = RELOCS  $@
+      cmd_insert_relocs = $(CMD_RELOCS) --keep $@
+
 quiet_cmd_strip_relocs = RSTRIP  $@
       cmd_strip_relocs = \
 	$(OBJCOPY) --remove-section='.rel.*' --remove-section='.rel__*' \
@@ -29,6 +33,9 @@  quiet_cmd_strip_relocs = RSTRIP  $@
 
 vmlinux: FORCE
 	@true
+ifeq ($(CONFIG_RELOCATABLE_UNCOMPRESSED_KERNEL),y)
+	$(call cmd,insert_relocs)
+endif
 ifeq ($(CONFIG_X86_NEED_RELOCS),y)
 	$(call cmd,relocs)
 	$(call cmd,strip_relocs)
diff --git a/arch/x86/kernel/head64_identity.c b/arch/x86/kernel/head64_identity.c
index ecac6e704868..4548ad615ecf 100644
--- a/arch/x86/kernel/head64_identity.c
+++ b/arch/x86/kernel/head64_identity.c
@@ -315,3 +315,73 @@  void __head startup_64_setup_env(void)
 
 	startup_64_load_idt();
 }
+
+#ifdef CONFIG_RELOCATABLE_UNCOMPRESSED_KERNEL
+extern u8 __relocation_end[];
+
+static bool __head is_in_pvh_pgtable(unsigned long ptr)
+{
+#ifdef CONFIG_PVH
+	if (ptr >= (unsigned long)init_top_pgt &&
+	    ptr < (unsigned long)init_top_pgt + PAGE_SIZE)
+		return true;
+	if (ptr >= (unsigned long)level3_ident_pgt &&
+	    ptr < (unsigned long)level3_ident_pgt + PAGE_SIZE)
+		return true;
+#endif
+	return false;
+}
+
+void __head __relocate_kernel(unsigned long physbase, unsigned long virtbase)
+{
+	int *reloc = (int *)__relocation_end;
+	unsigned long ptr;
+	unsigned long delta = virtbase - __START_KERNEL_map;
+	unsigned long map = physbase - __START_KERNEL;
+	long extended;
+
+	/*
+	 * Relocation had happended in bootloader,
+	 * don't do it again.
+	 */
+	if (SYM_ABS_VA(_text) != __START_KERNEL)
+		return;
+
+	if (!delta)
+		return;
+
+	/*
+	 * Format is:
+	 *
+	 * kernel bits...
+	 * 0 - zero terminator for 64 bit relocations
+	 * 64 bit relocation repeated
+	 * 0 - zero terminator for inverse 32 bit relocations
+	 * 32 bit inverse relocation repeated
+	 * 0 - zero terminator for 32 bit relocations
+	 * 32 bit relocation repeated
+	 *
+	 * So we work backwards from the end of .data.relocs section, see
+	 * handle_relocations() in arch/x86/boot/compressed/misc.c.
+	 */
+	while (*--reloc) {
+		extended = *reloc;
+		ptr = (unsigned long)(extended + map);
+		*(uint32_t *)ptr += delta;
+	}
+
+	while (*--reloc) {
+		extended = *reloc;
+		ptr = (unsigned long)(extended + map);
+		*(int32_t *)ptr -= delta;
+	}
+
+	while (*--reloc) {
+		extended = *reloc;
+		ptr = (unsigned long)(extended + map);
+		if (is_in_pvh_pgtable(ptr))
+			continue;
+		*(uint64_t *)ptr += delta;
+	}
+}
+#endif
diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S
index 834c68b45f15..3b05807fe1dc 100644
--- a/arch/x86/kernel/vmlinux.lds.S
+++ b/arch/x86/kernel/vmlinux.lds.S
@@ -339,6 +339,24 @@  SECTIONS
 	}
 #endif
 
+#ifdef CONFIG_RELOCATABLE_UNCOMPRESSED_KERNEL
+	. = ALIGN(4);
+	.data.reloc : AT(ADDR(.data.reloc) - LOAD_OFFSET) {
+		__relocation_start = .;
+		/*
+		 * Space for relocation table
+		 * This needs to be filled so that the
+		 * relocs tool can overwrite the content.
+		 * Put a dummy data item at the start to
+		 * avoid to generate NOBITS section.
+		 */
+		LONG(0);
+		FILL(0);
+		. += CONFIG_RELOCATION_TABLE_SIZE - 4;
+		__relocation_end = .;
+	}
+#endif
+
 	/*
 	 * struct alt_inst entries. From the header (alternative.h):
 	 * "Alternative instructions for different CPU types or capabilities"