Message ID | 5d2307efb227b927cc9fa3e18787fde8e1cb13e2.1708933498.git.isaku.yamahata@intel.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | [v19,001/130] x86/virt/tdx: Rename _offset to _member for TD_SYSINFO_MAP() macro | expand |
On Mon, 2024-02-26 at 00:25 -0800, isaku.yamahata@intel.com wrote: > To handle private page tables, argument of is_private needs to be > passed > down. Given that already page level is passed down, it would be > cumbersome > to add one more parameter about sp. Instead replace the level > argument with > union kvm_mmu_page_role. Thus the number of argument won't be > increased > and more info about sp can be passed down. > > For private sp, secure page table will be also allocated in addition > to > struct kvm_mmu_page and page table (spt member). The allocation > functions > (tdp_mmu_alloc_sp() and __tdp_mmu_alloc_sp_for_split()) need to know > if the > allocation is for the conventional page table or private page table. > Pass > union kvm_mmu_role to those functions and initialize role member of > struct > kvm_mmu_page. tdp_mmu_alloc_sp() is only called in two places. One for the root, and one for the mid-level tables. In later patches when the kvm_mmu_alloc_private_spt() part is added, the root case doesn't need anything done. So the code has to take special care in tdp_mmu_alloc_sp() to avoid doing anything for the root. It only needs to do the special private spt allocation in non-root case. If we open code that case, I think maybe we could drop this patch, like the below. The benefits are to drop this patch (which looks to already be part of Paolo's series), and simplify "KVM: x86/mmu: Add a private pointer to struct kvm_mmu_page". I'm not sure though, what do you think? Only build tested. diff --git a/arch/x86/kvm/mmu/mmu_internal.h b/arch/x86/kvm/mmu/mmu_internal.h index f1533a753974..d6c2ee8bb636 100644 --- a/arch/x86/kvm/mmu/mmu_internal.h +++ b/arch/x86/kvm/mmu/mmu_internal.h @@ -176,30 +176,12 @@ static inline void kvm_mmu_init_private_spt(struct kvm_mmu_page *sp, void *priva static inline void kvm_mmu_alloc_private_spt(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) { - bool is_root = vcpu->arch.root_mmu.root_role.level == sp- >role.level; - - KVM_BUG_ON(!kvm_mmu_page_role_is_private(sp->role), vcpu->kvm); - if (is_root) - /* - * Because TDX module assigns root Secure-EPT page and set it to - * Secure-EPTP when TD vcpu is created, secure page table for - * root isn't needed. - */ - sp->private_spt = NULL; - else { - /* - * Because the TDX module doesn't trust VMM and initializes - * the pages itself, KVM doesn't initialize them. Allocate - * pages with garbage and give them to the TDX module. - */ - sp->private_spt = kvm_mmu_memory_cache_alloc(&vcpu- >arch.mmu_private_spt_cache); - /* - * Because mmu_private_spt_cache is topped up before starting - * kvm page fault resolving, the allocation above shouldn't - * fail. - */ - WARN_ON_ONCE(!sp->private_spt); - } + /* + * Because the TDX module doesn't trust VMM and initializes + * the pages itself, KVM doesn't initialize them. Allocate + * pages with garbage and give them to the TDX module. + */ + sp->private_spt = kvm_mmu_memory_cache_alloc(&vcpu- >arch.mmu_private_spt_cache); } static inline gfn_t kvm_gfn_for_root(struct kvm *kvm, struct kvm_mmu_page *root, diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c index ac7bf37b353f..f423a38019fb 100644 --- a/arch/x86/kvm/mmu/tdp_mmu.c +++ b/arch/x86/kvm/mmu/tdp_mmu.c @@ -195,9 +195,6 @@ static struct kvm_mmu_page *tdp_mmu_alloc_sp(struct kvm_vcpu *vcpu) sp = kvm_mmu_memory_cache_alloc(&vcpu- >arch.mmu_page_header_cache); sp->spt = kvm_mmu_memory_cache_alloc(&vcpu- >arch.mmu_shadow_page_cache); - if (kvm_mmu_page_role_is_private(role)) - kvm_mmu_alloc_private_spt(vcpu, sp); - return sp; } @@ -1378,6 +1375,8 @@ int kvm_tdp_mmu_map(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault) * needs to be split. */ sp = tdp_mmu_alloc_sp(vcpu); + if (!(raw_gfn & kvm_gfn_shared_mask(kvm))) + kvm_mmu_alloc_private_spt(vcpu, sp); tdp_mmu_init_child_sp(sp, &iter); sp->nx_huge_page_disallowed = fault- >huge_page_disallowed; @@ -1670,7 +1669,6 @@ static struct kvm_mmu_page *__tdp_mmu_alloc_sp_for_split(struct kvm *kvm, gfp_t sp->spt = (void *)__get_free_page(gfp); /* TODO: large page support for private GPA. */ - WARN_ON_ONCE(kvm_mmu_page_role_is_private(role)); if (!sp->spt) { kmem_cache_free(mmu_page_header_cache, sp); return NULL; @@ -1686,10 +1684,6 @@ static struct kvm_mmu_page *tdp_mmu_alloc_sp_for_split(struct kvm *kvm, struct kvm_mmu_page *sp; kvm_lockdep_assert_mmu_lock_held(kvm, shared); - KVM_BUG_ON(kvm_mmu_page_role_is_private(role) != - is_private_sptep(iter->sptep), kvm); - /* TODO: Large page isn't supported for private SPTE yet. */ - KVM_BUG_ON(kvm_mmu_page_role_is_private(role), kvm); /* * Since we are allocating while under the MMU lock we have to be
On Thu, Mar 21, 2024 at 12:11:11AM +0000, "Edgecombe, Rick P" <rick.p.edgecombe@intel.com> wrote: > On Mon, 2024-02-26 at 00:25 -0800, isaku.yamahata@intel.com wrote: > > To handle private page tables, argument of is_private needs to be > > passed > > down. Given that already page level is passed down, it would be > > cumbersome > > to add one more parameter about sp. Instead replace the level > > argument with > > union kvm_mmu_page_role. Thus the number of argument won't be > > increased > > and more info about sp can be passed down. > > > > For private sp, secure page table will be also allocated in addition > > to > > struct kvm_mmu_page and page table (spt member). The allocation > > functions > > (tdp_mmu_alloc_sp() and __tdp_mmu_alloc_sp_for_split()) need to know > > if the > > allocation is for the conventional page table or private page table. > > Pass > > union kvm_mmu_role to those functions and initialize role member of > > struct > > kvm_mmu_page. > > tdp_mmu_alloc_sp() is only called in two places. One for the root, and > one for the mid-level tables. > > In later patches when the kvm_mmu_alloc_private_spt() part is added, > the root case doesn't need anything done. So the code has to take > special care in tdp_mmu_alloc_sp() to avoid doing anything for the > root. > > It only needs to do the special private spt allocation in non-root > case. If we open code that case, I think maybe we could drop this > patch, like the below. > > The benefits are to drop this patch (which looks to already be part of > Paolo's series), and simplify "KVM: x86/mmu: Add a private pointer to > struct kvm_mmu_page". I'm not sure though, what do you think? Only > build tested. Makes sense. Until v18, it had config to disable private mmu part at compile time. Those functions have #ifdef in mmu_internal.h. v19 dropped the config for the feedback. https://lore.kernel.org/kvm/Zcrarct88veirZx7@google.com/ After looking at mmu_internal.h, I think the following three function could be open coded. kvm_mmu_private_spt(), kvm_mmu_init_private_spt(), kvm_mmu_alloc_private_spt(), and kvm_mmu_free_private_spt().
On Thu, Mar 21, 2024 at 02:24:12PM -0700, Isaku Yamahata wrote: >On Thu, Mar 21, 2024 at 12:11:11AM +0000, >"Edgecombe, Rick P" <rick.p.edgecombe@intel.com> wrote: > >> On Mon, 2024-02-26 at 00:25 -0800, isaku.yamahata@intel.com wrote: >> > To handle private page tables, argument of is_private needs to be >> > passed >> > down. Given that already page level is passed down, it would be >> > cumbersome >> > to add one more parameter about sp. Instead replace the level >> > argument with >> > union kvm_mmu_page_role. Thus the number of argument won't be >> > increased >> > and more info about sp can be passed down. >> > >> > For private sp, secure page table will be also allocated in addition >> > to >> > struct kvm_mmu_page and page table (spt member). The allocation >> > functions >> > (tdp_mmu_alloc_sp() and __tdp_mmu_alloc_sp_for_split()) need to know >> > if the >> > allocation is for the conventional page table or private page table. >> > Pass >> > union kvm_mmu_role to those functions and initialize role member of >> > struct >> > kvm_mmu_page. >> >> tdp_mmu_alloc_sp() is only called in two places. One for the root, and >> one for the mid-level tables. >> >> In later patches when the kvm_mmu_alloc_private_spt() part is added, >> the root case doesn't need anything done. So the code has to take >> special care in tdp_mmu_alloc_sp() to avoid doing anything for the >> root. >> >> It only needs to do the special private spt allocation in non-root >> case. If we open code that case, I think maybe we could drop this >> patch, like the below. >> >> The benefits are to drop this patch (which looks to already be part of >> Paolo's series), and simplify "KVM: x86/mmu: Add a private pointer to >> struct kvm_mmu_page". I'm not sure though, what do you think? Only >> build tested. > >Makes sense. Until v18, it had config to disable private mmu part at >compile time. Those functions have #ifdef in mmu_internal.h. v19 >dropped the config for the feedback. > https://lore.kernel.org/kvm/Zcrarct88veirZx7@google.com/ > >After looking at mmu_internal.h, I think the following three function could be >open coded. >kvm_mmu_private_spt(), kvm_mmu_init_private_spt(), kvm_mmu_alloc_private_spt(), >and kvm_mmu_free_private_spt(). It took me a few minutes to figure out why the mirror root page doesn't need a private_spt. Per TDX module spec: Secure EPT’s root page (EPML4 or EPML5, depending on whether the host VMM uses 4-level or 5-level EPT) does not need to be explicitly added. It is created during TD initialization (TDH.MNG.INIT) and is stored as part of TDCS. I suggest adding the above as a comment somewhere even if we decide to open-code kvm_mmu_alloc_private_spt(). IMO, some TDX details bleed into KVM MMU regardless of whether we open-code kvm_mmu_alloc_private_spt() or not. This isn't good though I cannot think of a better solution.
On Fri, Mar 22, 2024 at 03:18:39PM +0800, Chao Gao <chao.gao@intel.com> wrote: > On Thu, Mar 21, 2024 at 02:24:12PM -0700, Isaku Yamahata wrote: > >On Thu, Mar 21, 2024 at 12:11:11AM +0000, > >"Edgecombe, Rick P" <rick.p.edgecombe@intel.com> wrote: > > > >> On Mon, 2024-02-26 at 00:25 -0800, isaku.yamahata@intel.com wrote: > >> > To handle private page tables, argument of is_private needs to be > >> > passed > >> > down. Given that already page level is passed down, it would be > >> > cumbersome > >> > to add one more parameter about sp. Instead replace the level > >> > argument with > >> > union kvm_mmu_page_role. Thus the number of argument won't be > >> > increased > >> > and more info about sp can be passed down. > >> > > >> > For private sp, secure page table will be also allocated in addition > >> > to > >> > struct kvm_mmu_page and page table (spt member). The allocation > >> > functions > >> > (tdp_mmu_alloc_sp() and __tdp_mmu_alloc_sp_for_split()) need to know > >> > if the > >> > allocation is for the conventional page table or private page table. > >> > Pass > >> > union kvm_mmu_role to those functions and initialize role member of > >> > struct > >> > kvm_mmu_page. > >> > >> tdp_mmu_alloc_sp() is only called in two places. One for the root, and > >> one for the mid-level tables. > >> > >> In later patches when the kvm_mmu_alloc_private_spt() part is added, > >> the root case doesn't need anything done. So the code has to take > >> special care in tdp_mmu_alloc_sp() to avoid doing anything for the > >> root. > >> > >> It only needs to do the special private spt allocation in non-root > >> case. If we open code that case, I think maybe we could drop this > >> patch, like the below. > >> > >> The benefits are to drop this patch (which looks to already be part of > >> Paolo's series), and simplify "KVM: x86/mmu: Add a private pointer to > >> struct kvm_mmu_page". I'm not sure though, what do you think? Only > >> build tested. > > > >Makes sense. Until v18, it had config to disable private mmu part at > >compile time. Those functions have #ifdef in mmu_internal.h. v19 > >dropped the config for the feedback. > > https://lore.kernel.org/kvm/Zcrarct88veirZx7@google.com/ > > > >After looking at mmu_internal.h, I think the following three function could be > >open coded. > >kvm_mmu_private_spt(), kvm_mmu_init_private_spt(), kvm_mmu_alloc_private_spt(), > >and kvm_mmu_free_private_spt(). > > It took me a few minutes to figure out why the mirror root page doesn't need > a private_spt. > > Per TDX module spec: > > Secure EPT’s root page (EPML4 or EPML5, depending on whether the host VMM uses > 4-level or 5-level EPT) does not need to be explicitly added. It is created > during TD initialization (TDH.MNG.INIT) and is stored as part of TDCS. > > I suggest adding the above as a comment somewhere even if we decide to open-code > kvm_mmu_alloc_private_spt(). 058/130 has such comment. The citation from the spec would be better. > IMO, some TDX details bleed into KVM MMU regardless of whether we open-code > kvm_mmu_alloc_private_spt() or not. This isn't good though I cannot think of > a better solution. >
On Wed, 2024-03-20 at 17:11 -0700, Rick Edgecombe wrote: > @@ -1378,6 +1375,8 @@ int kvm_tdp_mmu_map(struct kvm_vcpu *vcpu, struct > kvm_page_fault *fault) > * needs to be split. > */ > sp = tdp_mmu_alloc_sp(vcpu); > + if (!(raw_gfn & kvm_gfn_shared_mask(kvm))) > + kvm_mmu_alloc_private_spt(vcpu, sp); This will try to allocate the private SP for normal VMs (which have a zero shared mask), it should be: diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c index efed70580922..585c80fb62c5 100644 --- a/arch/x86/kvm/mmu/tdp_mmu.c +++ b/arch/x86/kvm/mmu/tdp_mmu.c @@ -1350,7 +1350,7 @@ int kvm_tdp_mmu_map(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault) * needs to be split. */ sp = tdp_mmu_alloc_sp(vcpu); - if (!(raw_gfn & kvm_gfn_shared_mask(kvm))) + if (kvm_is_private_gpa(kvm, raw_gfn << PAGE_SHIFT)) kvm_mmu_alloc_private_spt(vcpu, sp); tdp_mmu_init_child_sp(sp, &iter);
diff --git a/arch/x86/kvm/mmu/tdp_iter.h b/arch/x86/kvm/mmu/tdp_iter.h index fae559559a80..e1e40e3f5eb7 100644 --- a/arch/x86/kvm/mmu/tdp_iter.h +++ b/arch/x86/kvm/mmu/tdp_iter.h @@ -135,4 +135,16 @@ void tdp_iter_start(struct tdp_iter *iter, struct kvm_mmu_page *root, void tdp_iter_next(struct tdp_iter *iter); void tdp_iter_restart(struct tdp_iter *iter); +static inline union kvm_mmu_page_role tdp_iter_child_role(struct tdp_iter *iter) +{ + union kvm_mmu_page_role child_role; + struct kvm_mmu_page *parent_sp; + + parent_sp = sptep_to_sp(rcu_dereference(iter->sptep)); + + child_role = parent_sp->role; + child_role.level--; + return child_role; +} + #endif /* __KVM_X86_MMU_TDP_ITER_H */ diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c index 04c6af49c3e8..87233b3ceaef 100644 --- a/arch/x86/kvm/mmu/tdp_mmu.c +++ b/arch/x86/kvm/mmu/tdp_mmu.c @@ -177,24 +177,30 @@ static struct kvm_mmu_page *tdp_mmu_next_root(struct kvm *kvm, kvm_mmu_page_as_id(_root) != _as_id) { \ } else -static struct kvm_mmu_page *tdp_mmu_alloc_sp(struct kvm_vcpu *vcpu) +static struct kvm_mmu_page *tdp_mmu_alloc_sp(struct kvm_vcpu *vcpu, + union kvm_mmu_page_role role) { struct kvm_mmu_page *sp; sp = kvm_mmu_memory_cache_alloc(&vcpu->arch.mmu_page_header_cache); sp->spt = kvm_mmu_memory_cache_alloc(&vcpu->arch.mmu_shadow_page_cache); + sp->role = role; return sp; } static void tdp_mmu_init_sp(struct kvm_mmu_page *sp, tdp_ptep_t sptep, - gfn_t gfn, union kvm_mmu_page_role role) + gfn_t gfn) { INIT_LIST_HEAD(&sp->possible_nx_huge_page_link); set_page_private(virt_to_page(sp->spt), (unsigned long)sp); - sp->role = role; + /* + * role must be set before calling this function. At least role.level + * is not 0 (PG_LEVEL_NONE). + */ + WARN_ON_ONCE(!sp->role.word); sp->gfn = gfn; sp->ptep = sptep; sp->tdp_mmu_page = true; @@ -202,20 +208,6 @@ static void tdp_mmu_init_sp(struct kvm_mmu_page *sp, tdp_ptep_t sptep, trace_kvm_mmu_get_page(sp, true); } -static void tdp_mmu_init_child_sp(struct kvm_mmu_page *child_sp, - struct tdp_iter *iter) -{ - struct kvm_mmu_page *parent_sp; - union kvm_mmu_page_role role; - - parent_sp = sptep_to_sp(rcu_dereference(iter->sptep)); - - role = parent_sp->role; - role.level--; - - tdp_mmu_init_sp(child_sp, iter->sptep, iter->gfn, role); -} - hpa_t kvm_tdp_mmu_get_vcpu_root_hpa(struct kvm_vcpu *vcpu) { union kvm_mmu_page_role role = vcpu->arch.mmu->root_role; @@ -234,8 +226,8 @@ hpa_t kvm_tdp_mmu_get_vcpu_root_hpa(struct kvm_vcpu *vcpu) goto out; } - root = tdp_mmu_alloc_sp(vcpu); - tdp_mmu_init_sp(root, NULL, 0, role); + root = tdp_mmu_alloc_sp(vcpu, role); + tdp_mmu_init_sp(root, NULL, 0); /* * TDP MMU roots are kept until they are explicitly invalidated, either @@ -1068,8 +1060,8 @@ int kvm_tdp_mmu_map(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault) * The SPTE is either non-present or points to a huge page that * needs to be split. */ - sp = tdp_mmu_alloc_sp(vcpu); - tdp_mmu_init_child_sp(sp, &iter); + sp = tdp_mmu_alloc_sp(vcpu, tdp_iter_child_role(&iter)); + tdp_mmu_init_sp(sp, iter.sptep, iter.gfn); sp->nx_huge_page_disallowed = fault->huge_page_disallowed; @@ -1312,7 +1304,7 @@ bool kvm_tdp_mmu_wrprot_slot(struct kvm *kvm, return spte_set; } -static struct kvm_mmu_page *__tdp_mmu_alloc_sp_for_split(gfp_t gfp) +static struct kvm_mmu_page *__tdp_mmu_alloc_sp_for_split(gfp_t gfp, union kvm_mmu_page_role role) { struct kvm_mmu_page *sp; @@ -1322,6 +1314,7 @@ static struct kvm_mmu_page *__tdp_mmu_alloc_sp_for_split(gfp_t gfp) if (!sp) return NULL; + sp->role = role; sp->spt = (void *)__get_free_page(gfp); if (!sp->spt) { kmem_cache_free(mmu_page_header_cache, sp); @@ -1335,6 +1328,7 @@ static struct kvm_mmu_page *tdp_mmu_alloc_sp_for_split(struct kvm *kvm, struct tdp_iter *iter, bool shared) { + union kvm_mmu_page_role role = tdp_iter_child_role(iter); struct kvm_mmu_page *sp; kvm_lockdep_assert_mmu_lock_held(kvm, shared); @@ -1348,7 +1342,7 @@ static struct kvm_mmu_page *tdp_mmu_alloc_sp_for_split(struct kvm *kvm, * If this allocation fails we drop the lock and retry with reclaim * allowed. */ - sp = __tdp_mmu_alloc_sp_for_split(GFP_NOWAIT | __GFP_ACCOUNT); + sp = __tdp_mmu_alloc_sp_for_split(GFP_NOWAIT | __GFP_ACCOUNT, role); if (sp) return sp; @@ -1360,7 +1354,7 @@ static struct kvm_mmu_page *tdp_mmu_alloc_sp_for_split(struct kvm *kvm, write_unlock(&kvm->mmu_lock); iter->yielded = true; - sp = __tdp_mmu_alloc_sp_for_split(GFP_KERNEL_ACCOUNT); + sp = __tdp_mmu_alloc_sp_for_split(GFP_KERNEL_ACCOUNT, role); if (shared) read_lock(&kvm->mmu_lock); @@ -1455,7 +1449,7 @@ static int tdp_mmu_split_huge_pages_root(struct kvm *kvm, continue; } - tdp_mmu_init_child_sp(sp, &iter); + tdp_mmu_init_sp(sp, iter.sptep, iter.gfn); if (tdp_mmu_split_huge_page(kvm, &iter, sp, shared)) goto retry;