@@ -775,20 +775,6 @@ static int tdx_sept_drop_private_spte(struct kvm *kvm, gfn_t gfn,
&level_state);
} while (unlikely(tdx_operand_busy(err)));
- if (unlikely(kvm_tdx->state != TD_STATE_RUNNABLE &&
- err == (TDX_EPT_WALK_FAILED | TDX_OPERAND_ID_RCX))) {
- /*
- * Page is mapped by KVM_TDX_INIT_MEM_REGION, but hasn't called
- * tdh_mem_page_add().
- */
- if ((!is_last_spte(entry, level) || !(entry & VMX_EPT_RWX_MASK)) &&
- !KVM_BUG_ON(!atomic64_read(&kvm_tdx->nr_premapped), kvm)) {
- atomic64_dec(&kvm_tdx->nr_premapped);
- tdx_unpin(kvm, page);
- return 0;
- }
- }
-
if (KVM_BUG_ON(err, kvm)) {
pr_tdx_error_2(TDH_MEM_PAGE_REMOVE, err, entry, level_state);
return -EIO;
@@ -826,8 +812,41 @@ int tdx_sept_link_private_spt(struct kvm *kvm, gfn_t gfn,
return 0;
}
+/*
+ * Check if the error returned from a SEPT zap SEAMCALL is due to that a page is
+ * mapped by KVM_TDX_INIT_MEM_REGION without tdh_mem_page_add() being called
+ * successfully.
+ *
+ * Since tdh_mem_sept_add() must have been invoked successfully before a
+ * non-leaf entry present in the mirrored page table, the SEPT ZAP related
+ * SEAMCALLs should not encounter err TDX_EPT_WALK_FAILED. They should instead
+ * find TDX_EPT_ENTRY_STATE_INCORRECT due to an empty leaf entry found in the
+ * SEPT.
+ *
+ * Further check if the returned entry from SEPT walking is with RWX permissions
+ * to filter out anything unexpected.
+ *
+ * Note: @level is pg_level, not the tdx_level. The tdx_level extracted from
+ * level_state returned from a SEAMCALL error is the same as that passed into
+ * the SEAMCALL.
+ */
+static int tdx_is_sept_zap_err_due_to_premap(struct kvm_tdx *kvm_tdx, u64 err,
+ u64 entry, int level)
+{
+ if (!err || kvm_tdx->state == TD_STATE_RUNNABLE)
+ return false;
+
+ if (err != (TDX_EPT_ENTRY_STATE_INCORRECT | TDX_OPERAND_ID_RCX))
+ return false;
+
+ if ((is_last_spte(entry, level) && (entry & VMX_EPT_RWX_MASK)))
+ return false;
+
+ return true;
+}
+
static int tdx_sept_zap_private_spte(struct kvm *kvm, gfn_t gfn,
- enum pg_level level)
+ enum pg_level level, struct page *page)
{
int tdx_level = pg_level_to_tdx_sept_level(level);
struct kvm_tdx *kvm_tdx = to_kvm_tdx(kvm);
@@ -840,11 +859,19 @@ static int tdx_sept_zap_private_spte(struct kvm *kvm, gfn_t gfn,
err = tdh_mem_range_block(&kvm_tdx->td, gpa, tdx_level, &entry, &level_state);
if (unlikely(tdx_operand_busy(err)))
return -EBUSY;
+
+ if (tdx_is_sept_zap_err_due_to_premap(kvm_tdx, err, entry, level) &&
+ !KVM_BUG_ON(!atomic64_read(&kvm_tdx->nr_premapped), kvm)) {
+ atomic64_dec(&kvm_tdx->nr_premapped);
+ tdx_unpin(kvm, page);
+ return 0;
+ }
+
if (KVM_BUG_ON(err, kvm)) {
pr_tdx_error_2(TDH_MEM_RANGE_BLOCK, err, entry, level_state);
return -EIO;
}
- return 0;
+ return 1;
}
/*
@@ -918,6 +945,7 @@ int tdx_sept_free_private_spt(struct kvm *kvm, gfn_t gfn,
int tdx_sept_remove_private_spte(struct kvm *kvm, gfn_t gfn,
enum pg_level level, kvm_pfn_t pfn)
{
+ struct page *page = pfn_to_page(pfn);
int ret;
/*
@@ -928,8 +956,8 @@ int tdx_sept_remove_private_spte(struct kvm *kvm, gfn_t gfn,
if (KVM_BUG_ON(!is_hkid_assigned(to_kvm_tdx(kvm)), kvm))
return -EINVAL;
- ret = tdx_sept_zap_private_spte(kvm, gfn, level);
- if (ret)
+ ret = tdx_sept_zap_private_spte(kvm, gfn, level, page);
+ if (ret <= 0)
return ret;
/*
@@ -938,7 +966,7 @@ int tdx_sept_remove_private_spte(struct kvm *kvm, gfn_t gfn,
*/
tdx_track(kvm);
- return tdx_sept_drop_private_spte(kvm, gfn, level, pfn_to_page(pfn));
+ return tdx_sept_drop_private_spte(kvm, gfn, level, page);
}
static int tdx_get_capabilities(struct kvm_tdx_cmd *cmd)