@@ -1971,6 +1971,7 @@ config INTEL_TDX_HOST
depends on X86_64
depends on KVM_INTEL
depends on X86_X2APIC
+ select ARCH_KEEP_MEMBLOCK
help
Intel Trust Domain Extensions (TDX) protects guest VMs from malicious
host and certain physical attacks. This option enables necessary TDX
@@ -1033,6 +1033,8 @@ void __init setup_arch(char **cmdline_p)
*
* Moreover, on machines with SandyBridge graphics or in setups that use
* crashkernel the entire 1M is reserved anyway.
+ *
+ * Note the host kernel TDX also requires the first 1MB being reserved.
*/
x86_platform.realmode_reserve();
@@ -16,6 +16,12 @@
#include <linux/spinlock.h>
#include <linux/percpu-defs.h>
#include <linux/mutex.h>
+#include <linux/list.h>
+#include <linux/memblock.h>
+#include <linux/memory.h>
+#include <linux/minmax.h>
+#include <linux/sizes.h>
+#include <linux/pfn.h>
#include <asm/msr-index.h>
#include <asm/msr.h>
#include <asm/tdx.h>
@@ -30,6 +36,9 @@ static DEFINE_PER_CPU(bool, tdx_lp_initialized);
static enum tdx_module_status_t tdx_module_status;
static DEFINE_MUTEX(tdx_module_lock);
+/* All TDX-usable memory regions. Protected by mem_hotplug_lock. */
+static LIST_HEAD(tdx_memlist);
+
typedef void (*sc_err_func_t)(u64 fn, u64 err, struct tdx_module_args *args);
static inline void seamcall_err(u64 fn, u64 err, struct tdx_module_args *args)
@@ -153,12 +162,102 @@ int tdx_cpu_enable(void)
}
EXPORT_SYMBOL_GPL(tdx_cpu_enable);
+/*
+ * Add a memory region as a TDX memory block. The caller must make sure
+ * all memory regions are added in address ascending order and don't
+ * overlap.
+ */
+static int add_tdx_memblock(struct list_head *tmb_list, unsigned long start_pfn,
+ unsigned long end_pfn)
+{
+ struct tdx_memblock *tmb;
+
+ tmb = kmalloc(sizeof(*tmb), GFP_KERNEL);
+ if (!tmb)
+ return -ENOMEM;
+
+ INIT_LIST_HEAD(&tmb->list);
+ tmb->start_pfn = start_pfn;
+ tmb->end_pfn = end_pfn;
+
+ /* @tmb_list is protected by mem_hotplug_lock */
+ list_add_tail(&tmb->list, tmb_list);
+ return 0;
+}
+
+static void free_tdx_memlist(struct list_head *tmb_list)
+{
+ /* @tmb_list is protected by mem_hotplug_lock */
+ while (!list_empty(tmb_list)) {
+ struct tdx_memblock *tmb = list_first_entry(tmb_list,
+ struct tdx_memblock, list);
+
+ list_del(&tmb->list);
+ kfree(tmb);
+ }
+}
+
+/*
+ * Ensure that all memblock memory regions are convertible to TDX
+ * memory. Once this has been established, stash the memblock
+ * ranges off in a secondary structure because memblock is modified
+ * in memory hotplug while TDX memory regions are fixed.
+ */
+static int build_tdx_memlist(struct list_head *tmb_list)
+{
+ unsigned long start_pfn, end_pfn;
+ int i, ret;
+
+ for_each_mem_pfn_range(i, MAX_NUMNODES, &start_pfn, &end_pfn, NULL) {
+ /*
+ * The first 1MB is not reported as TDX convertible memory.
+ * Although the first 1MB is always reserved and won't end up
+ * to the page allocator, it is still in memblock's memory
+ * regions. Skip them manually to exclude them as TDX memory.
+ */
+ start_pfn = max(start_pfn, PHYS_PFN(SZ_1M));
+ if (start_pfn >= end_pfn)
+ continue;
+
+ /*
+ * Add the memory regions as TDX memory. The regions in
+ * memblock has already guaranteed they are in address
+ * ascending order and don't overlap.
+ */
+ ret = add_tdx_memblock(tmb_list, start_pfn, end_pfn);
+ if (ret)
+ goto err;
+ }
+
+ return 0;
+err:
+ free_tdx_memlist(tmb_list);
+ return ret;
+}
+
static int init_tdx_module(void)
{
+ int ret;
+
+ /*
+ * To keep things simple, assume that all TDX-protected memory
+ * will come from the page allocator. Make sure all pages in the
+ * page allocator are TDX-usable memory.
+ *
+ * Build the list of "TDX-usable" memory regions which cover all
+ * pages in the page allocator to guarantee that. Do it while
+ * holding mem_hotplug_lock read-lock as the memory hotplug code
+ * path reads the @tdx_memlist to reject any new memory.
+ */
+ get_online_mems();
+
+ ret = build_tdx_memlist(&tdx_memlist);
+ if (ret)
+ goto out_put_tdxmem;
+
/*
* TODO:
*
- * - Build the list of TDX-usable memory regions.
* - Get TDX module "TD Memory Region" (TDMR) global metadata.
* - Construct a list of TDMRs to cover all TDX-usable memory
* regions.
@@ -168,7 +267,14 @@ static int init_tdx_module(void)
*
* Return error before all steps are done.
*/
- return -EINVAL;
+ ret = -EINVAL;
+out_put_tdxmem:
+ /*
+ * @tdx_memlist is written here and read at memory hotplug time.
+ * Lock out memory hotplug code while building it.
+ */
+ put_online_mems();
+ return ret;
}
static int __tdx_enable(void)
@@ -258,6 +364,56 @@ static int __init record_keyid_partitioning(u32 *tdx_keyid_start,
return 0;
}
+static bool is_tdx_memory(unsigned long start_pfn, unsigned long end_pfn)
+{
+ struct tdx_memblock *tmb;
+
+ /*
+ * This check assumes that the start_pfn<->end_pfn range does not
+ * cross multiple @tdx_memlist entries. A single memory online
+ * event across multiple memblocks (from which @tdx_memlist
+ * entries are derived at the time of module initialization) is
+ * not possible. This is because memory offline/online is done
+ * on granularity of 'struct memory_block', and the hotpluggable
+ * memory region (one memblock) must be multiple of memory_block.
+ */
+ list_for_each_entry(tmb, &tdx_memlist, list) {
+ if (start_pfn >= tmb->start_pfn && end_pfn <= tmb->end_pfn)
+ return true;
+ }
+ return false;
+}
+
+static int tdx_memory_notifier(struct notifier_block *nb, unsigned long action,
+ void *v)
+{
+ struct memory_notify *mn = v;
+
+ if (action != MEM_GOING_ONLINE)
+ return NOTIFY_OK;
+
+ /*
+ * Empty list means TDX isn't enabled. Allow any memory
+ * to go online.
+ */
+ if (list_empty(&tdx_memlist))
+ return NOTIFY_OK;
+
+ /*
+ * The TDX memory configuration is static and can not be
+ * changed. Reject onlining any memory which is outside of
+ * the static configuration whether it supports TDX or not.
+ */
+ if (is_tdx_memory(mn->start_pfn, mn->start_pfn + mn->nr_pages))
+ return NOTIFY_OK;
+
+ return NOTIFY_BAD;
+}
+
+static struct notifier_block tdx_memory_nb = {
+ .notifier_call = tdx_memory_notifier,
+};
+
static int __init tdx_init(void)
{
u32 tdx_keyid_start, nr_tdx_keyids;
@@ -281,6 +437,13 @@ static int __init tdx_init(void)
return -ENODEV;
}
+ err = register_memory_notifier(&tdx_memory_nb);
+ if (err) {
+ pr_err("initialization failed: register_memory_notifier() failed (%d)\n",
+ err);
+ return -ENODEV;
+ }
+
/*
* Just use the first TDX KeyID as the 'global KeyID' and
* leave the rest for TDX guests.
@@ -27,4 +27,10 @@ enum tdx_module_status_t {
TDX_MODULE_ERROR
};
+struct tdx_memblock {
+ struct list_head list;
+ unsigned long start_pfn;
+ unsigned long end_pfn;
+};
+
#endif