Message ID | 20151112205245.6bd773737cfa78422dac5a79@lab.ntt.co.jp (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
On Thu, Nov 12, 2015 at 08:52:45PM +0900, Takuya Yoshikawa wrote: > kvm_mmu_mark_parents_unsync() alone uses pte_list_walk(), witch does > nearly the same as the for_each_rmap_spte macro. The only difference > is that is_shadow_present_pte() checks cannot be placed there because > kvm_mmu_mark_parents_unsync() can be called with a new parent pointer > whose entry is not set yet. > > By calling mark_unsync() separately for the parent and adding the parent > pointer to the parent_ptes chain later in kvm_mmu_get_page(), the macro > works with no problem. > > Signed-off-by: Takuya Yoshikawa <yoshikawa_takuya_b1@lab.ntt.co.jp> > --- > arch/x86/kvm/mmu.c | 36 +++++++++++++----------------------- > 1 file changed, 13 insertions(+), 23 deletions(-) > > diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c > index e8cfdc4..1691171 100644 > --- a/arch/x86/kvm/mmu.c > +++ b/arch/x86/kvm/mmu.c > @@ -1007,26 +1007,6 @@ static void pte_list_remove(u64 *spte, unsigned long *pte_list) > } > } > > -typedef void (*pte_list_walk_fn) (u64 *spte); > -static void pte_list_walk(unsigned long *pte_list, pte_list_walk_fn fn) > -{ > - struct pte_list_desc *desc; > - int i; > - > - if (!*pte_list) > - return; > - > - if (!(*pte_list & 1)) > - return fn((u64 *)*pte_list); > - > - desc = (struct pte_list_desc *)(*pte_list & ~1ul); > - while (desc) { > - for (i = 0; i < PTE_LIST_EXT && desc->sptes[i]; ++i) > - fn(desc->sptes[i]); > - desc = desc->more; > - } > -} > - > static unsigned long *__gfn_to_rmap(gfn_t gfn, int level, > struct kvm_memory_slot *slot) > { > @@ -1741,7 +1721,12 @@ static struct kvm_mmu_page *kvm_mmu_alloc_page(struct kvm_vcpu *vcpu, > static void mark_unsync(u64 *spte); > static void kvm_mmu_mark_parents_unsync(struct kvm_mmu_page *sp) > { > - pte_list_walk(&sp->parent_ptes, mark_unsync); > + u64 *sptep; > + struct rmap_iterator iter; > + > + for_each_rmap_spte(&sp->parent_ptes, &iter, sptep) { > + mark_unsync(sptep); > + } > } > > static void mark_unsync(u64 *spte) > @@ -2111,12 +2096,17 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu, Faulting a spte, and one of the levels of sptes, either spte-1 spte-2 spte-3 has present bit clear. So we're searching for a guest page to shadow, with gfn "gfn". > if (sp->unsync && kvm_sync_page_transient(vcpu, sp)) > break; If a shadow for gfn exists, but is unsync, sync guest-page ---to--> kvm sptes. > - mmu_page_add_parent_pte(vcpu, sp, parent_pte); add "gfn" (actually its "struct kvm_mmu_page *sp" pointer) to the parent. > if (sp->unsync_children) { > kvm_make_request(KVM_REQ_MMU_SYNC, vcpu); > kvm_mmu_mark_parents_unsync(sp); kvm_mmu_mark_parents_unsync relied on the links from current level all the way to top level to mark all levels unsync, so that on guest entry, KVM_REQ_MMU_SYNC is processed and any level is brought from guest --> kvm pages. This now fails, because you removed "mmu_page_add_parent_pte" (the link is not formed all the way to root). Unless i am missing something, this is not correct. > - } else if (sp->unsync) > + if (parent_pte) > + mark_unsync(parent_pte); > + } else if (sp->unsync) { > kvm_mmu_mark_parents_unsync(sp); > + if (parent_pte) > + mark_unsync(parent_pte); > + } > + mmu_page_add_parent_pte(vcpu, sp, parent_pte); > > __clear_sp_write_flooding_count(sp); > trace_kvm_mmu_get_page(sp, false); -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Fri, Nov 13, 2015 at 07:47:28PM -0200, Marcelo Tosatti wrote: > On Thu, Nov 12, 2015 at 08:52:45PM +0900, Takuya Yoshikawa wrote: > > kvm_mmu_mark_parents_unsync() alone uses pte_list_walk(), witch does > > nearly the same as the for_each_rmap_spte macro. The only difference > > is that is_shadow_present_pte() checks cannot be placed there because > > kvm_mmu_mark_parents_unsync() can be called with a new parent pointer > > whose entry is not set yet. > > > > By calling mark_unsync() separately for the parent and adding the parent > > pointer to the parent_ptes chain later in kvm_mmu_get_page(), the macro > > works with no problem. > > > > Signed-off-by: Takuya Yoshikawa <yoshikawa_takuya_b1@lab.ntt.co.jp> > > --- > > arch/x86/kvm/mmu.c | 36 +++++++++++++----------------------- > > 1 file changed, 13 insertions(+), 23 deletions(-) > > > > diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c > > index e8cfdc4..1691171 100644 > > --- a/arch/x86/kvm/mmu.c > > +++ b/arch/x86/kvm/mmu.c > > @@ -1007,26 +1007,6 @@ static void pte_list_remove(u64 *spte, unsigned long *pte_list) > > } > > } > > > > -typedef void (*pte_list_walk_fn) (u64 *spte); > > -static void pte_list_walk(unsigned long *pte_list, pte_list_walk_fn fn) > > -{ > > - struct pte_list_desc *desc; > > - int i; > > - > > - if (!*pte_list) > > - return; > > - > > - if (!(*pte_list & 1)) > > - return fn((u64 *)*pte_list); > > - > > - desc = (struct pte_list_desc *)(*pte_list & ~1ul); > > - while (desc) { > > - for (i = 0; i < PTE_LIST_EXT && desc->sptes[i]; ++i) > > - fn(desc->sptes[i]); > > - desc = desc->more; > > - } > > -} > > - > > static unsigned long *__gfn_to_rmap(gfn_t gfn, int level, > > struct kvm_memory_slot *slot) > > { > > @@ -1741,7 +1721,12 @@ static struct kvm_mmu_page *kvm_mmu_alloc_page(struct kvm_vcpu *vcpu, > > static void mark_unsync(u64 *spte); > > static void kvm_mmu_mark_parents_unsync(struct kvm_mmu_page *sp) > > { > > - pte_list_walk(&sp->parent_ptes, mark_unsync); > > + u64 *sptep; > > + struct rmap_iterator iter; > > + > > + for_each_rmap_spte(&sp->parent_ptes, &iter, sptep) { > > + mark_unsync(sptep); > > + } > > } > > > > static void mark_unsync(u64 *spte) > > @@ -2111,12 +2096,17 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu, > > Faulting a spte, and one of the levels of sptes, either > > > spte-1 > spte-2 > spte-3 > > has present bit clear. So we're searching for a guest page to shadow, with > gfn "gfn". > > > if (sp->unsync && kvm_sync_page_transient(vcpu, sp)) > > break; > > If a shadow for gfn exists, but is unsync, sync guest-page ---to--> kvm > sptes. > > > - mmu_page_add_parent_pte(vcpu, sp, parent_pte); > > add "gfn" (actually its "struct kvm_mmu_page *sp" pointer) to > the parent. > > if (sp->unsync_children) { > > kvm_make_request(KVM_REQ_MMU_SYNC, vcpu); > > kvm_mmu_mark_parents_unsync(sp); > > kvm_mmu_mark_parents_unsync relied on the links from current level all > the way to top level to mark all levels unsync, so that on guest entry, > KVM_REQ_MMU_SYNC is processed and any level is brought from guest --> > kvm pages. This now fails, because you removed "mmu_page_add_parent_pte" > (the link is not formed all the way to root). > > Unless i am missing something, this is not correct. The actual issue is this: a higher level page that had, under its children, no out of sync pages, now, due to your addition, a child that is unsync: initial state: level1 final state: level1 -x-> level2 -x-> level3 Where -x-> are the links created by this pagefault fixing round. If _any_ page under you is unsync (not necessarily the ones this pagefault is accessing), you have to mark parents unsync. -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On 2015/11/14 18:20, Marcelo Tosatti wrote: > The actual issue is this: a higher level page that had, under its children, > no out of sync pages, now, due to your addition, a child that is unsync: > > initial state: > level1 > > final state: > > level1 -x-> level2 -x-> level3 > > Where -x-> are the links created by this pagefault fixing round. > > If _any_ page under you is unsync (not necessarily the ones this > pagefault is accessing), you have to mark parents unsync. I understand this, but I don't think my patch will break this. What kvm_mmu_mark_parents_unsync() does is: for each p_i in sp->parent_ptes rmap chain mark_unsync(p_i); Then, mark_unsync() finds the parent sp including that p_i to set ->unsync_child_bitmap and increment ->unsync_children if necessary. It may also call kvm_mmu_mark_parents_unsync() recursively. I understand we need to tell the parents "you have an unsync child/descendant" until this information reaches the top level by that recursive calls. But since these recursive calls cannot come back to the starting sp, the child->parent graph has no loop, each mark_unsync(p_i) will not be affected by other parents in that sp->parent_ptes rmap chain, from which we started the recursive calls. As the following code shows, my patch does mark_unsync(parent_pte) separately, and then mmu_page_add_parent_pte(vcpu, sp, parent_pte): > - } else if (sp->unsync) > + if (parent_pte) > + mark_unsync(parent_pte); > + } else if (sp->unsync) { > kvm_mmu_mark_parents_unsync(sp); > + if (parent_pte) > + mark_unsync(parent_pte); > + } > + mmu_page_add_parent_pte(vcpu, sp, parent_pte); So, as you worried, during each mark_unsync(p_i) is processed, this parent_pte does not exist in that sp->parent_ptes rmap chain. But as I explained above, this does not change anything about what each mark_unsync(p_i) call does, so keeps the original behaviour. By the way, I think "kvm_mmu_mark_parents_unsync" and "mark_unsync" do not tell what they actually do well. When I first saw the names, I thought they would just set the parents' sp->unsync. To reflect the following meaning better, it should be propagate_unsync(_to_parents) or something: Tell the parents "you have an unsync child/descendant" until this unsync information reaches the top level Thanks, Takuya -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On 16/11/2015 03:51, Takuya Yoshikawa wrote: > What kvm_mmu_mark_parents_unsync() does is: > > for each p_i in sp->parent_ptes rmap chain > mark_unsync(p_i); > > Then, mark_unsync() finds the parent sp including that p_i to > set ->unsync_child_bitmap and increment ->unsync_children if > necessary. It may also call kvm_mmu_mark_parents_unsync() > recursively. I agree. sp->parent_ptes goes up one level from sp; kvm_mmu_mark_parents_unsync(sp) visits the level above sp, while mark_unsync(sp) visit sp and all the levels above it. Calling mark_unsync(parent_pte) is enough to complete the visit, after kvm_mmu_mark_parents_unsync has already processed the level above sp. Paolo -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On 11/12/2015 07:52 PM, Takuya Yoshikawa wrote: > kvm_mmu_mark_parents_unsync() alone uses pte_list_walk(), witch does > nearly the same as the for_each_rmap_spte macro. The only difference > is that is_shadow_present_pte() checks cannot be placed there because > kvm_mmu_mark_parents_unsync() can be called with a new parent pointer > whose entry is not set yet. > > By calling mark_unsync() separately for the parent and adding the parent > pointer to the parent_ptes chain later in kvm_mmu_get_page(), the macro > works with no problem. > > Signed-off-by: Takuya Yoshikawa <yoshikawa_takuya_b1@lab.ntt.co.jp> > --- > arch/x86/kvm/mmu.c | 36 +++++++++++++----------------------- > 1 file changed, 13 insertions(+), 23 deletions(-) > > diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c > index e8cfdc4..1691171 100644 > --- a/arch/x86/kvm/mmu.c > +++ b/arch/x86/kvm/mmu.c > @@ -1007,26 +1007,6 @@ static void pte_list_remove(u64 *spte, unsigned long *pte_list) > } > } > > -typedef void (*pte_list_walk_fn) (u64 *spte); > -static void pte_list_walk(unsigned long *pte_list, pte_list_walk_fn fn) > -{ > - struct pte_list_desc *desc; > - int i; > - > - if (!*pte_list) > - return; > - > - if (!(*pte_list & 1)) > - return fn((u64 *)*pte_list); > - > - desc = (struct pte_list_desc *)(*pte_list & ~1ul); > - while (desc) { > - for (i = 0; i < PTE_LIST_EXT && desc->sptes[i]; ++i) > - fn(desc->sptes[i]); > - desc = desc->more; > - } > -} > - > static unsigned long *__gfn_to_rmap(gfn_t gfn, int level, > struct kvm_memory_slot *slot) > { > @@ -1741,7 +1721,12 @@ static struct kvm_mmu_page *kvm_mmu_alloc_page(struct kvm_vcpu *vcpu, > static void mark_unsync(u64 *spte); > static void kvm_mmu_mark_parents_unsync(struct kvm_mmu_page *sp) > { > - pte_list_walk(&sp->parent_ptes, mark_unsync); > + u64 *sptep; > + struct rmap_iterator iter; > + > + for_each_rmap_spte(&sp->parent_ptes, &iter, sptep) { > + mark_unsync(sptep); > + } > } > > static void mark_unsync(u64 *spte) > @@ -2111,12 +2096,17 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu, > if (sp->unsync && kvm_sync_page_transient(vcpu, sp)) > break; > > - mmu_page_add_parent_pte(vcpu, sp, parent_pte); > if (sp->unsync_children) { > kvm_make_request(KVM_REQ_MMU_SYNC, vcpu); > kvm_mmu_mark_parents_unsync(sp); After your change above, the @sp's parents have not been changed, no need to call it now. > - } else if (sp->unsync) > + if (parent_pte) > + mark_unsync(parent_pte); > + } else if (sp->unsync) { > kvm_mmu_mark_parents_unsync(sp); Ditto. > + if (parent_pte) > + mark_unsync(parent_pte); > + } > + mmu_page_add_parent_pte(vcpu, sp, parent_pte); > > __clear_sp_write_flooding_count(sp); > trace_kvm_mmu_get_page(sp, false); > -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index e8cfdc4..1691171 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -1007,26 +1007,6 @@ static void pte_list_remove(u64 *spte, unsigned long *pte_list) } } -typedef void (*pte_list_walk_fn) (u64 *spte); -static void pte_list_walk(unsigned long *pte_list, pte_list_walk_fn fn) -{ - struct pte_list_desc *desc; - int i; - - if (!*pte_list) - return; - - if (!(*pte_list & 1)) - return fn((u64 *)*pte_list); - - desc = (struct pte_list_desc *)(*pte_list & ~1ul); - while (desc) { - for (i = 0; i < PTE_LIST_EXT && desc->sptes[i]; ++i) - fn(desc->sptes[i]); - desc = desc->more; - } -} - static unsigned long *__gfn_to_rmap(gfn_t gfn, int level, struct kvm_memory_slot *slot) { @@ -1741,7 +1721,12 @@ static struct kvm_mmu_page *kvm_mmu_alloc_page(struct kvm_vcpu *vcpu, static void mark_unsync(u64 *spte); static void kvm_mmu_mark_parents_unsync(struct kvm_mmu_page *sp) { - pte_list_walk(&sp->parent_ptes, mark_unsync); + u64 *sptep; + struct rmap_iterator iter; + + for_each_rmap_spte(&sp->parent_ptes, &iter, sptep) { + mark_unsync(sptep); + } } static void mark_unsync(u64 *spte) @@ -2111,12 +2096,17 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu, if (sp->unsync && kvm_sync_page_transient(vcpu, sp)) break; - mmu_page_add_parent_pte(vcpu, sp, parent_pte); if (sp->unsync_children) { kvm_make_request(KVM_REQ_MMU_SYNC, vcpu); kvm_mmu_mark_parents_unsync(sp); - } else if (sp->unsync) + if (parent_pte) + mark_unsync(parent_pte); + } else if (sp->unsync) { kvm_mmu_mark_parents_unsync(sp); + if (parent_pte) + mark_unsync(parent_pte); + } + mmu_page_add_parent_pte(vcpu, sp, parent_pte); __clear_sp_write_flooding_count(sp); trace_kvm_mmu_get_page(sp, false);
kvm_mmu_mark_parents_unsync() alone uses pte_list_walk(), witch does nearly the same as the for_each_rmap_spte macro. The only difference is that is_shadow_present_pte() checks cannot be placed there because kvm_mmu_mark_parents_unsync() can be called with a new parent pointer whose entry is not set yet. By calling mark_unsync() separately for the parent and adding the parent pointer to the parent_ptes chain later in kvm_mmu_get_page(), the macro works with no problem. Signed-off-by: Takuya Yoshikawa <yoshikawa_takuya_b1@lab.ntt.co.jp> --- arch/x86/kvm/mmu.c | 36 +++++++++++++----------------------- 1 file changed, 13 insertions(+), 23 deletions(-)