Message ID | 31552f714f2fd8178f9467e9afaaf28ba3de3c7b.1699368363.git.isaku.yamahata@intel.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | KVM TDX: TDP MMU: large page support | expand |
On 11/7/2023 11:00 PM, isaku.yamahata@intel.com wrote: > From: Xiaoyao Li <xiaoyao.li@intel.com> > > A 2MB large page can be tdh_mem_page_aug()'ed to TD directly. In this case, > it needs to reclaim and clear the page as 2MB size. > > Signed-off-by: Xiaoyao Li <xiaoyao.li@intel.com> > Signed-off-by: Isaku Yamahata <isaku.yamahata@intel.com> > --- > arch/x86/kvm/vmx/tdx.c | 27 +++++++++++++++------------ > 1 file changed, 15 insertions(+), 12 deletions(-) > > diff --git a/arch/x86/kvm/vmx/tdx.c b/arch/x86/kvm/vmx/tdx.c > index a728175c4a6d..0fca863faeee 100644 > --- a/arch/x86/kvm/vmx/tdx.c > +++ b/arch/x86/kvm/vmx/tdx.c > @@ -200,12 +200,13 @@ static void tdx_disassociate_vp_on_cpu(struct kvm_vcpu *vcpu) > smp_call_function_single(cpu, tdx_disassociate_vp_arg, vcpu, 1); > } > > -static void tdx_clear_page(unsigned long page_pa) > +static void tdx_clear_page(unsigned long page_pa, int size) Should use "unsigned long" instead of "int" for size to avoid implicit type conversion. > { > const void *zero_page = (const void *) __va(page_to_phys(ZERO_PAGE(0))); > void *page = __va(page_pa); > unsigned long i; > > + WARN_ON_ONCE(size % PAGE_SIZE); > /* > * When re-assign one page from old keyid to a new keyid, MOVDIR64B is > * required to clear/write the page with new keyid to prevent integrity > @@ -214,7 +215,7 @@ static void tdx_clear_page(unsigned long page_pa) > * clflush doesn't flush cache with HKID set. The cache line could be > * poisoned (even without MKTME-i), clear the poison bit. > */ > - for (i = 0; i < PAGE_SIZE; i += 64) > + for (i = 0; i < size; i += 64) > movdir64b(page + i, zero_page); > /* > * MOVDIR64B store uses WC buffer. Prevent following memory reads > @@ -223,7 +224,7 @@ static void tdx_clear_page(unsigned long page_pa) > __mb(); > } > > -static int __tdx_reclaim_page(hpa_t pa) > +static int __tdx_reclaim_page(hpa_t pa, enum pg_level level) > { > struct tdx_module_args out; > u64 err; > @@ -241,17 +242,19 @@ static int __tdx_reclaim_page(hpa_t pa) > pr_tdx_error(TDH_PHYMEM_PAGE_RECLAIM, err, &out); > return -EIO; > } > + /* out.r8 == tdx sept page level */ > + WARN_ON_ONCE(out.r8 != pg_level_to_tdx_sept_level(level)); > > return 0; > } > > -static int tdx_reclaim_page(hpa_t pa) > +static int tdx_reclaim_page(hpa_t pa, enum pg_level level) > { > int r; > > - r = __tdx_reclaim_page(pa); > + r = __tdx_reclaim_page(pa, level); > if (!r) > - tdx_clear_page(pa); > + tdx_clear_page(pa, KVM_HPAGE_SIZE(level)); > return r; > } > > @@ -265,7 +268,7 @@ static void tdx_reclaim_td_page(unsigned long td_page_pa) > * was already flushed by TDH.PHYMEM.CACHE.WB before here, So > * cache doesn't need to be flushed again. > */ > - if (tdx_reclaim_page(td_page_pa)) > + if (tdx_reclaim_page(td_page_pa, PG_LEVEL_4K)) > /* > * Leak the page on failure: > * tdx_reclaim_page() returns an error if and only if there's an > @@ -497,7 +500,7 @@ void tdx_vm_free(struct kvm *kvm) > > if (!kvm_tdx->tdr_pa) > return; > - if (__tdx_reclaim_page(kvm_tdx->tdr_pa)) > + if (__tdx_reclaim_page(kvm_tdx->tdr_pa, PG_LEVEL_4K)) > return; > /* > * TDX module maps TDR with TDX global HKID. TDX module may access TDR > @@ -510,7 +513,7 @@ void tdx_vm_free(struct kvm *kvm) > pr_tdx_error(TDH_PHYMEM_PAGE_WBINVD, err, NULL); > return; > } > - tdx_clear_page(kvm_tdx->tdr_pa); > + tdx_clear_page(kvm_tdx->tdr_pa, PAGE_SIZE); > > free_page((unsigned long)__va(kvm_tdx->tdr_pa)); > kvm_tdx->tdr_pa = 0; > @@ -1597,7 +1600,7 @@ static int tdx_sept_drop_private_spte(struct kvm *kvm, gfn_t gfn, > * The HKID assigned to this TD was already freed and cache > * was already flushed. We don't have to flush again. > */ > - err = tdx_reclaim_page(hpa); > + err = tdx_reclaim_page(hpa, level); > if (KVM_BUG_ON(err, kvm)) > return -EIO; > tdx_unpin(kvm, pfn); > @@ -1630,7 +1633,7 @@ static int tdx_sept_drop_private_spte(struct kvm *kvm, gfn_t gfn, > pr_tdx_error(TDH_PHYMEM_PAGE_WBINVD, err, NULL); > return -EIO; > } > - tdx_clear_page(hpa); > + tdx_clear_page(hpa, PAGE_SIZE); Should here be KVM_HPAGE_SIZE(level) instead of PAGE_SIZE? > tdx_unpin(kvm, pfn); > return 0; > } > @@ -1742,7 +1745,7 @@ static int tdx_sept_free_private_spt(struct kvm *kvm, gfn_t gfn, > * already flushed. We don't have to flush again. > */ > if (!is_hkid_assigned(kvm_tdx)) > - return tdx_reclaim_page(__pa(private_spt)); > + return tdx_reclaim_page(__pa(private_spt), PG_LEVEL_4K); > > /* > * free_private_spt() is (obviously) called when a shadow page is being
On 11/19/2023 2:42 PM, Binbin Wu wrote: > > > On 11/7/2023 11:00 PM, isaku.yamahata@intel.com wrote: >> @@ -1597,7 +1600,7 @@ static int tdx_sept_drop_private_spte(struct >> kvm *kvm, gfn_t gfn, >> * The HKID assigned to this TD was already freed and cache >> * was already flushed. We don't have to flush again. >> */ >> - err = tdx_reclaim_page(hpa); >> + err = tdx_reclaim_page(hpa, level); >> if (KVM_BUG_ON(err, kvm)) >> return -EIO; >> tdx_unpin(kvm, pfn); >> @@ -1630,7 +1633,7 @@ static int tdx_sept_drop_private_spte(struct >> kvm *kvm, gfn_t gfn, >> pr_tdx_error(TDH_PHYMEM_PAGE_WBINVD, err, NULL); >> return -EIO; >> } >> - tdx_clear_page(hpa); >> + tdx_clear_page(hpa, PAGE_SIZE); > Should here be KVM_HPAGE_SIZE(level) instead of PAGE_SIZE? OK, please ignore this comment, I see this is handled by the following patch. > >> tdx_unpin(kvm, pfn); >> return 0; >> } >> @@ -1742,7 +1745,7 @@ static int tdx_sept_free_private_spt(struct kvm >> *kvm, gfn_t gfn, >> * already flushed. We don't have to flush again. >> */ >> if (!is_hkid_assigned(kvm_tdx)) >> - return tdx_reclaim_page(__pa(private_spt)); >> + return tdx_reclaim_page(__pa(private_spt), PG_LEVEL_4K); >> /* >> * free_private_spt() is (obviously) called when a shadow page >> is being > >
diff --git a/arch/x86/kvm/vmx/tdx.c b/arch/x86/kvm/vmx/tdx.c index a728175c4a6d..0fca863faeee 100644 --- a/arch/x86/kvm/vmx/tdx.c +++ b/arch/x86/kvm/vmx/tdx.c @@ -200,12 +200,13 @@ static void tdx_disassociate_vp_on_cpu(struct kvm_vcpu *vcpu) smp_call_function_single(cpu, tdx_disassociate_vp_arg, vcpu, 1); } -static void tdx_clear_page(unsigned long page_pa) +static void tdx_clear_page(unsigned long page_pa, int size) { const void *zero_page = (const void *) __va(page_to_phys(ZERO_PAGE(0))); void *page = __va(page_pa); unsigned long i; + WARN_ON_ONCE(size % PAGE_SIZE); /* * When re-assign one page from old keyid to a new keyid, MOVDIR64B is * required to clear/write the page with new keyid to prevent integrity @@ -214,7 +215,7 @@ static void tdx_clear_page(unsigned long page_pa) * clflush doesn't flush cache with HKID set. The cache line could be * poisoned (even without MKTME-i), clear the poison bit. */ - for (i = 0; i < PAGE_SIZE; i += 64) + for (i = 0; i < size; i += 64) movdir64b(page + i, zero_page); /* * MOVDIR64B store uses WC buffer. Prevent following memory reads @@ -223,7 +224,7 @@ static void tdx_clear_page(unsigned long page_pa) __mb(); } -static int __tdx_reclaim_page(hpa_t pa) +static int __tdx_reclaim_page(hpa_t pa, enum pg_level level) { struct tdx_module_args out; u64 err; @@ -241,17 +242,19 @@ static int __tdx_reclaim_page(hpa_t pa) pr_tdx_error(TDH_PHYMEM_PAGE_RECLAIM, err, &out); return -EIO; } + /* out.r8 == tdx sept page level */ + WARN_ON_ONCE(out.r8 != pg_level_to_tdx_sept_level(level)); return 0; } -static int tdx_reclaim_page(hpa_t pa) +static int tdx_reclaim_page(hpa_t pa, enum pg_level level) { int r; - r = __tdx_reclaim_page(pa); + r = __tdx_reclaim_page(pa, level); if (!r) - tdx_clear_page(pa); + tdx_clear_page(pa, KVM_HPAGE_SIZE(level)); return r; } @@ -265,7 +268,7 @@ static void tdx_reclaim_td_page(unsigned long td_page_pa) * was already flushed by TDH.PHYMEM.CACHE.WB before here, So * cache doesn't need to be flushed again. */ - if (tdx_reclaim_page(td_page_pa)) + if (tdx_reclaim_page(td_page_pa, PG_LEVEL_4K)) /* * Leak the page on failure: * tdx_reclaim_page() returns an error if and only if there's an @@ -497,7 +500,7 @@ void tdx_vm_free(struct kvm *kvm) if (!kvm_tdx->tdr_pa) return; - if (__tdx_reclaim_page(kvm_tdx->tdr_pa)) + if (__tdx_reclaim_page(kvm_tdx->tdr_pa, PG_LEVEL_4K)) return; /* * TDX module maps TDR with TDX global HKID. TDX module may access TDR @@ -510,7 +513,7 @@ void tdx_vm_free(struct kvm *kvm) pr_tdx_error(TDH_PHYMEM_PAGE_WBINVD, err, NULL); return; } - tdx_clear_page(kvm_tdx->tdr_pa); + tdx_clear_page(kvm_tdx->tdr_pa, PAGE_SIZE); free_page((unsigned long)__va(kvm_tdx->tdr_pa)); kvm_tdx->tdr_pa = 0; @@ -1597,7 +1600,7 @@ static int tdx_sept_drop_private_spte(struct kvm *kvm, gfn_t gfn, * The HKID assigned to this TD was already freed and cache * was already flushed. We don't have to flush again. */ - err = tdx_reclaim_page(hpa); + err = tdx_reclaim_page(hpa, level); if (KVM_BUG_ON(err, kvm)) return -EIO; tdx_unpin(kvm, pfn); @@ -1630,7 +1633,7 @@ static int tdx_sept_drop_private_spte(struct kvm *kvm, gfn_t gfn, pr_tdx_error(TDH_PHYMEM_PAGE_WBINVD, err, NULL); return -EIO; } - tdx_clear_page(hpa); + tdx_clear_page(hpa, PAGE_SIZE); tdx_unpin(kvm, pfn); return 0; } @@ -1742,7 +1745,7 @@ static int tdx_sept_free_private_spt(struct kvm *kvm, gfn_t gfn, * already flushed. We don't have to flush again. */ if (!is_hkid_assigned(kvm_tdx)) - return tdx_reclaim_page(__pa(private_spt)); + return tdx_reclaim_page(__pa(private_spt), PG_LEVEL_4K); /* * free_private_spt() is (obviously) called when a shadow page is being