@@ -932,6 +932,7 @@ enum kvm_tdx_cmd_id {
KVM_TDX_INIT_VM,
KVM_TDX_INIT_VCPU,
KVM_TDX_INIT_MEM_REGION,
+ KVM_TDX_FINALIZE_VM,
KVM_TDX_GET_CPUID,
KVM_TDX_CMD_NR_MAX,
@@ -563,6 +563,31 @@ static int tdx_mem_page_aug(struct kvm *kvm, gfn_t gfn,
return 0;
}
+/*
+ * KVM_TDX_INIT_MEM_REGION calls kvm_gmem_populate() to get guest pages and
+ * tdx_gmem_post_populate() to premap page table pages into private EPT.
+ * Mapping guest pages into private EPT before TD is finalized should use a
+ * seamcall TDH.MEM.PAGE.ADD(), which copies page content from a source page
+ * from user to target guest pages to be added. This source page is not
+ * available via common interface kvm_tdp_map_page(). So, currently,
+ * kvm_tdp_map_page() only premaps guest pages into KVM mirrored root.
+ * A counter nr_premapped is increased here to record status. The counter will
+ * be decreased after TDH.MEM.PAGE.ADD() is called after the kvm_tdp_map_page()
+ * in tdx_gmem_post_populate().
+ */
+static int tdx_mem_page_record_premap_cnt(struct kvm *kvm, gfn_t gfn,
+ enum pg_level level, kvm_pfn_t pfn)
+{
+ struct kvm_tdx *kvm_tdx = to_kvm_tdx(kvm);
+
+ if (KVM_BUG_ON(kvm->arch.pre_fault_allowed, kvm))
+ return -EINVAL;
+
+ /* nr_premapped will be decreased when tdh_mem_page_add() is called. */
+ atomic64_inc(&kvm_tdx->nr_premapped);
+ return 0;
+}
+
int tdx_sept_set_private_spte(struct kvm *kvm, gfn_t gfn,
enum pg_level level, kvm_pfn_t pfn)
{
@@ -582,14 +607,15 @@ int tdx_sept_set_private_spte(struct kvm *kvm, gfn_t gfn,
*/
get_page(pfn_to_page(pfn));
+ /*
+ * To match ordering of 'finalized' and 'pre_fault_allowed' in
+ * tdx_td_finalizemr().
+ */
+ smp_rmb();
if (likely(kvm_tdx->state == TD_STATE_RUNNABLE))
return tdx_mem_page_aug(kvm, gfn, level, pfn);
- /*
- * TODO: KVM_TDX_INIT_MEM_REGION support to populate before finalize
- * comes here for the initial memory.
- */
- return -EOPNOTSUPP;
+ return tdx_mem_page_record_premap_cnt(kvm, gfn, level, pfn);
}
static int tdx_sept_drop_private_spte(struct kvm *kvm, gfn_t gfn,
@@ -621,10 +647,12 @@ static int tdx_sept_drop_private_spte(struct kvm *kvm, gfn_t gfn,
if (unlikely(kvm_tdx->state != TD_STATE_RUNNABLE &&
err == (TDX_EPT_WALK_FAILED | TDX_OPERAND_ID_RCX))) {
/*
- * This page was mapped with KVM_MAP_MEMORY, but
- * KVM_TDX_INIT_MEM_REGION is not issued yet.
+ * Page is mapped by KVM_TDX_INIT_MEM_REGION, but hasn't called
+ * tdh_mem_page_add().
*/
if (!is_last_spte(entry, level) || !(entry & VMX_EPT_RWX_MASK)) {
+ WARN_ON_ONCE(!atomic64_read(&kvm_tdx->nr_premapped));
+ atomic64_dec(&kvm_tdx->nr_premapped);
tdx_unpin(kvm, pfn);
return 0;
}
@@ -1368,6 +1396,36 @@ void tdx_flush_tlb_all(struct kvm_vcpu *vcpu)
ept_sync_global();
}
+static int tdx_td_finalizemr(struct kvm *kvm, struct kvm_tdx_cmd *cmd)
+{
+ struct kvm_tdx *kvm_tdx = to_kvm_tdx(kvm);
+
+ guard(mutex)(&kvm->slots_lock);
+
+ if (!is_hkid_assigned(kvm_tdx) || kvm_tdx->state == TD_STATE_RUNNABLE)
+ return -EINVAL;
+ /*
+ * Pages are pending for KVM_TDX_INIT_MEM_REGION to issue
+ * TDH.MEM.PAGE.ADD().
+ */
+ if (atomic64_read(&kvm_tdx->nr_premapped))
+ return -EINVAL;
+
+ cmd->hw_error = tdh_mr_finalize(kvm_tdx->tdr_pa);
+ if ((cmd->hw_error & TDX_SEAMCALL_STATUS_MASK) == TDX_OPERAND_BUSY)
+ return -EAGAIN;
+ if (KVM_BUG_ON(cmd->hw_error, kvm)) {
+ pr_tdx_error(TDH_MR_FINALIZE, cmd->hw_error);
+ return -EIO;
+ }
+
+ kvm_tdx->state = TD_STATE_RUNNABLE;
+ /* TD_STATE_RUNNABLE must be set before 'pre_fault_allowed' */
+ smp_wmb();
+ kvm->arch.pre_fault_allowed = true;
+ return 0;
+}
+
int tdx_vm_ioctl(struct kvm *kvm, void __user *argp)
{
struct kvm_tdx_cmd tdx_cmd;
@@ -1392,6 +1450,9 @@ int tdx_vm_ioctl(struct kvm *kvm, void __user *argp)
case KVM_TDX_INIT_VM:
r = tdx_td_init(kvm, &tdx_cmd);
break;
+ case KVM_TDX_FINALIZE_VM:
+ r = tdx_td_finalizemr(kvm, &tdx_cmd);
+ break;
default:
r = -EINVAL;
goto out;
@@ -1656,6 +1717,9 @@ static int tdx_gmem_post_populate(struct kvm *kvm, gfn_t gfn, kvm_pfn_t pfn,
goto out;
}
+ WARN_ON_ONCE(!atomic64_read(&kvm_tdx->nr_premapped));
+ atomic64_dec(&kvm_tdx->nr_premapped);
+
if (arg->flags & KVM_TDX_MEASURE_MEMORY_REGION) {
for (i = 0; i < PAGE_SIZE; i += TDX_EXTENDMR_CHUNKSIZE) {
err = tdh_mr_extend(kvm_tdx->tdr_pa, gpa + i, &entry,
@@ -32,6 +32,9 @@ struct kvm_tdx {
u64 tsc_offset;
enum kvm_tdx_state state;
+
+ /* For KVM_TDX_INIT_MEM_REGION. */
+ atomic64_t nr_premapped;
};
/* TDX module vCPU states */