Message ID | 1448907973-36066-9-git-send-email-guangrong.xiao@linux.intel.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
On 12/01/2015 02:26 AM, Xiao Guangrong wrote: > non-leaf shadow pages are always write protected, it can be the user > of page track > > Signed-off-by: Xiao Guangrong <guangrong.xiao@linux.intel.com> > --- > arch/x86/include/asm/kvm_page_track.h | 8 +++++ > arch/x86/kvm/mmu.c | 26 +++++++++++++--- > arch/x86/kvm/page_track.c | 58 +++++++++++++++++++++++------------ > 3 files changed, 67 insertions(+), 25 deletions(-) > > diff --git a/arch/x86/include/asm/kvm_page_track.h b/arch/x86/include/asm/kvm_page_track.h > index 6744234..3447dac 100644 > --- a/arch/x86/include/asm/kvm_page_track.h > +++ b/arch/x86/include/asm/kvm_page_track.h > @@ -41,8 +41,16 @@ int kvm_page_track_create_memslot(struct kvm_memory_slot *slot, > void kvm_page_track_free_memslot(struct kvm_memory_slot *free, > struct kvm_memory_slot *dont); > > +void > +kvm_slot_page_track_add_page_nolock(struct kvm *kvm, > + struct kvm_memory_slot *slot, gfn_t gfn, > + enum kvm_page_track_mode mode); > void kvm_page_track_add_page(struct kvm *kvm, gfn_t gfn, > enum kvm_page_track_mode mode); > +void kvm_slot_page_track_remove_page_nolock(struct kvm *kvm, > + struct kvm_memory_slot *slot, > + gfn_t gfn, > + enum kvm_page_track_mode mode); > void kvm_page_track_remove_page(struct kvm *kvm, gfn_t gfn, > enum kvm_page_track_mode mode); > bool kvm_page_track_check_mode(struct kvm_vcpu *vcpu, gfn_t gfn, > diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c > index b23f9fc..5a2ca73 100644 > --- a/arch/x86/kvm/mmu.c > +++ b/arch/x86/kvm/mmu.c > @@ -806,11 +806,17 @@ static void account_shadowed(struct kvm *kvm, struct kvm_mmu_page *sp) > struct kvm_memory_slot *slot; > gfn_t gfn; > > + kvm->arch.indirect_shadow_pages++; > gfn = sp->gfn; > slots = kvm_memslots_for_spte_role(kvm, sp->role); > slot = __gfn_to_memslot(slots, gfn); > + > + /* the non-leaf shadow pages are keeping readonly. */ > + if (sp->role.level > PT_PAGE_TABLE_LEVEL) > + return kvm_slot_page_track_add_page_nolock(kvm, slot, gfn, > + KVM_PAGE_TRACK_WRITE); > + > kvm_mmu_gfn_disallow_lpage(slot, gfn); > - kvm->arch.indirect_shadow_pages++; > } > > static void unaccount_shadowed(struct kvm *kvm, struct kvm_mmu_page *sp) > @@ -819,11 +825,15 @@ static void unaccount_shadowed(struct kvm *kvm, struct kvm_mmu_page *sp) > struct kvm_memory_slot *slot; > gfn_t gfn; > > + kvm->arch.indirect_shadow_pages--; > gfn = sp->gfn; > slots = kvm_memslots_for_spte_role(kvm, sp->role); > slot = __gfn_to_memslot(slots, gfn); > + if (sp->role.level > PT_PAGE_TABLE_LEVEL) > + return kvm_slot_page_track_remove_page_nolock(kvm, slot, gfn, > + KVM_PAGE_TRACK_WRITE); > + > kvm_mmu_gfn_allow_lpage(slot, gfn); > - kvm->arch.indirect_shadow_pages--; > } > > static bool __mmu_gfn_lpage_is_disallowed(gfn_t gfn, int level, > @@ -2140,12 +2150,18 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu, > hlist_add_head(&sp->hash_link, > &vcpu->kvm->arch.mmu_page_hash[kvm_page_table_hashfn(gfn)]); > if (!direct) { > - if (rmap_write_protect(vcpu, gfn)) > + /* > + * we should do write protection before syncing pages > + * otherwise the content of the synced shadow page may > + * be inconsistent with guest page table. > + */ > + account_shadowed(vcpu->kvm, sp); > + > + if (level == PT_PAGE_TABLE_LEVEL && > + rmap_write_protect(vcpu, gfn)) > kvm_flush_remote_tlbs(vcpu->kvm); I think your modification is good but I am little bit confused here. In account_shadowed, if sp->role.level > PT_PAGE_TABLE_LEVEL, the sp->gfn is write protected, and this is reasonable. So why write protecting the gfn of PT_PAGE_TABLE_LEVEL here? > if (level > PT_PAGE_TABLE_LEVEL && need_sync) > kvm_sync_pages(vcpu, gfn); > - > - account_shadowed(vcpu->kvm, sp); > } > sp->mmu_valid_gen = vcpu->kvm->arch.mmu_valid_gen; > init_shadow_page_table(sp); > diff --git a/arch/x86/kvm/page_track.c b/arch/x86/kvm/page_track.c > index 84420df..87554d3 100644 > --- a/arch/x86/kvm/page_track.c > +++ b/arch/x86/kvm/page_track.c > @@ -77,6 +77,26 @@ static void update_gfn_track(struct kvm_memory_slot *slot, gfn_t gfn, > WARN_ON(val < 0); > } > > +void > +kvm_slot_page_track_add_page_nolock(struct kvm *kvm, > + struct kvm_memory_slot *slot, gfn_t gfn, > + enum kvm_page_track_mode mode) > +{ > + WARN_ON(!check_mode(mode)); > + > + update_gfn_track(slot, gfn, mode, 1); > + > + /* > + * new track stops large page mapping for the > + * tracked page. > + */ > + kvm_mmu_gfn_disallow_lpage(slot, gfn); > + > + if (mode == KVM_PAGE_TRACK_WRITE) > + if (kvm_mmu_slot_gfn_write_protect(kvm, slot, gfn)) > + kvm_flush_remote_tlbs(kvm); > +} > + > /* > * add guest page to the tracking pool so that corresponding access on that > * page will be intercepted. > @@ -101,21 +121,27 @@ void kvm_page_track_add_page(struct kvm *kvm, gfn_t gfn, > slot = __gfn_to_memslot(slots, gfn); > > spin_lock(&kvm->mmu_lock); > - update_gfn_track(slot, gfn, mode, 1); > - > - /* > - * new track stops large page mapping for the > - * tracked page. > - */ > - kvm_mmu_gfn_disallow_lpage(slot, gfn); > - > - if (mode == KVM_PAGE_TRACK_WRITE) > - if (kvm_mmu_slot_gfn_write_protect(kvm, slot, gfn)) > - kvm_flush_remote_tlbs(kvm); > + kvm_slot_page_track_add_page_nolock(kvm, slot, gfn, mode); > spin_unlock(&kvm->mmu_lock); > } > } > > +void kvm_slot_page_track_remove_page_nolock(struct kvm *kvm, > + struct kvm_memory_slot *slot, > + gfn_t gfn, > + enum kvm_page_track_mode mode) > +{ > + WARN_ON(!check_mode(mode)); > + > + update_gfn_track(slot, gfn, mode, -1); > + > + /* > + * allow large page mapping for the tracked page > + * after the tracker is gone. > + */ > + kvm_mmu_gfn_allow_lpage(slot, gfn); > +} > + > /* > * remove the guest page from the tracking pool which stops the interception > * of corresponding access on that page. It is the opposed operation of > @@ -134,20 +160,12 @@ void kvm_page_track_remove_page(struct kvm *kvm, gfn_t gfn, > struct kvm_memory_slot *slot; > int i; > > - WARN_ON(!check_mode(mode)); > - > for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++) { > slots = __kvm_memslots(kvm, i); > slot = __gfn_to_memslot(slots, gfn); > > spin_lock(&kvm->mmu_lock); > - update_gfn_track(slot, gfn, mode, -1); > - > - /* > - * allow large page mapping for the tracked page > - * after the tracker is gone. > - */ > - kvm_mmu_gfn_allow_lpage(slot, gfn); > + kvm_slot_page_track_remove_page_nolock(kvm, slot, gfn, mode); Looks you need to merge this part with patch 1, as you are modifying kvm_page_track_{add,remove}_page here, which are introduced in your patch 1. Thanks, -Kai > spin_unlock(&kvm->mmu_lock); > } > } -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On 12/15/2015 03:52 PM, Kai Huang wrote: > > > On 12/01/2015 02:26 AM, Xiao Guangrong wrote: >> non-leaf shadow pages are always write protected, it can be the user >> of page track >> >> Signed-off-by: Xiao Guangrong <guangrong.xiao@linux.intel.com> >> --- >> arch/x86/include/asm/kvm_page_track.h | 8 +++++ >> arch/x86/kvm/mmu.c | 26 +++++++++++++--- >> arch/x86/kvm/page_track.c | 58 >> +++++++++++++++++++++++------------ >> 3 files changed, 67 insertions(+), 25 deletions(-) >> >> diff --git a/arch/x86/include/asm/kvm_page_track.h >> b/arch/x86/include/asm/kvm_page_track.h >> index 6744234..3447dac 100644 >> --- a/arch/x86/include/asm/kvm_page_track.h >> +++ b/arch/x86/include/asm/kvm_page_track.h >> @@ -41,8 +41,16 @@ int kvm_page_track_create_memslot(struct >> kvm_memory_slot *slot, >> void kvm_page_track_free_memslot(struct kvm_memory_slot *free, >> struct kvm_memory_slot *dont); >> +void >> +kvm_slot_page_track_add_page_nolock(struct kvm *kvm, >> + struct kvm_memory_slot *slot, gfn_t gfn, >> + enum kvm_page_track_mode mode); >> void kvm_page_track_add_page(struct kvm *kvm, gfn_t gfn, >> enum kvm_page_track_mode mode); >> +void kvm_slot_page_track_remove_page_nolock(struct kvm *kvm, >> + struct kvm_memory_slot *slot, >> + gfn_t gfn, >> + enum kvm_page_track_mode mode); >> void kvm_page_track_remove_page(struct kvm *kvm, gfn_t gfn, >> enum kvm_page_track_mode mode); >> bool kvm_page_track_check_mode(struct kvm_vcpu *vcpu, gfn_t gfn, >> diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c >> index b23f9fc..5a2ca73 100644 >> --- a/arch/x86/kvm/mmu.c >> +++ b/arch/x86/kvm/mmu.c >> @@ -806,11 +806,17 @@ static void account_shadowed(struct kvm *kvm, >> struct kvm_mmu_page *sp) >> struct kvm_memory_slot *slot; >> gfn_t gfn; >> + kvm->arch.indirect_shadow_pages++; >> gfn = sp->gfn; >> slots = kvm_memslots_for_spte_role(kvm, sp->role); >> slot = __gfn_to_memslot(slots, gfn); >> + >> + /* the non-leaf shadow pages are keeping readonly. */ >> + if (sp->role.level > PT_PAGE_TABLE_LEVEL) >> + return kvm_slot_page_track_add_page_nolock(kvm, slot, gfn, >> + KVM_PAGE_TRACK_WRITE); >> + >> kvm_mmu_gfn_disallow_lpage(slot, gfn); >> - kvm->arch.indirect_shadow_pages++; >> } >> static void unaccount_shadowed(struct kvm *kvm, struct >> kvm_mmu_page *sp) >> @@ -819,11 +825,15 @@ static void unaccount_shadowed(struct kvm *kvm, >> struct kvm_mmu_page *sp) >> struct kvm_memory_slot *slot; >> gfn_t gfn; >> + kvm->arch.indirect_shadow_pages--; >> gfn = sp->gfn; >> slots = kvm_memslots_for_spte_role(kvm, sp->role); >> slot = __gfn_to_memslot(slots, gfn); >> + if (sp->role.level > PT_PAGE_TABLE_LEVEL) >> + return kvm_slot_page_track_remove_page_nolock(kvm, slot, gfn, >> + KVM_PAGE_TRACK_WRITE); >> + >> kvm_mmu_gfn_allow_lpage(slot, gfn); >> - kvm->arch.indirect_shadow_pages--; >> } >> static bool __mmu_gfn_lpage_is_disallowed(gfn_t gfn, int level, >> @@ -2140,12 +2150,18 @@ static struct kvm_mmu_page >> *kvm_mmu_get_page(struct kvm_vcpu *vcpu, >> hlist_add_head(&sp->hash_link, >> &vcpu->kvm->arch.mmu_page_hash[kvm_page_table_hashfn(gfn)]); >> if (!direct) { >> - if (rmap_write_protect(vcpu, gfn)) >> + /* >> + * we should do write protection before syncing pages >> + * otherwise the content of the synced shadow page may >> + * be inconsistent with guest page table. >> + */ >> + account_shadowed(vcpu->kvm, sp); >> + >> + if (level == PT_PAGE_TABLE_LEVEL && >> + rmap_write_protect(vcpu, gfn)) >> kvm_flush_remote_tlbs(vcpu->kvm); > I think your modification is good but I am little bit confused here. > In account_shadowed, if sp->role.level > PT_PAGE_TABLE_LEVEL, the > sp->gfn is write protected, and this is reasonable. So why write > protecting the gfn of PT_PAGE_TABLE_LEVEL here? > >> if (level > PT_PAGE_TABLE_LEVEL && need_sync) >> kvm_sync_pages(vcpu, gfn); >> - >> - account_shadowed(vcpu->kvm, sp); >> } >> sp->mmu_valid_gen = vcpu->kvm->arch.mmu_valid_gen; >> init_shadow_page_table(sp); >> diff --git a/arch/x86/kvm/page_track.c b/arch/x86/kvm/page_track.c >> index 84420df..87554d3 100644 >> --- a/arch/x86/kvm/page_track.c >> +++ b/arch/x86/kvm/page_track.c >> @@ -77,6 +77,26 @@ static void update_gfn_track(struct >> kvm_memory_slot *slot, gfn_t gfn, >> WARN_ON(val < 0); >> } >> +void >> +kvm_slot_page_track_add_page_nolock(struct kvm *kvm, >> + struct kvm_memory_slot *slot, gfn_t gfn, >> + enum kvm_page_track_mode mode) >> +{ >> + WARN_ON(!check_mode(mode)); >> + >> + update_gfn_track(slot, gfn, mode, 1); >> + >> + /* >> + * new track stops large page mapping for the >> + * tracked page. >> + */ >> + kvm_mmu_gfn_disallow_lpage(slot, gfn); >> + >> + if (mode == KVM_PAGE_TRACK_WRITE) >> + if (kvm_mmu_slot_gfn_write_protect(kvm, slot, gfn)) >> + kvm_flush_remote_tlbs(kvm); >> +} >> + >> /* >> * add guest page to the tracking pool so that corresponding access >> on that >> * page will be intercepted. >> @@ -101,21 +121,27 @@ void kvm_page_track_add_page(struct kvm *kvm, >> gfn_t gfn, >> slot = __gfn_to_memslot(slots, gfn); >> spin_lock(&kvm->mmu_lock); >> - update_gfn_track(slot, gfn, mode, 1); >> - >> - /* >> - * new track stops large page mapping for the >> - * tracked page. >> - */ >> - kvm_mmu_gfn_disallow_lpage(slot, gfn); >> - >> - if (mode == KVM_PAGE_TRACK_WRITE) >> - if (kvm_mmu_slot_gfn_write_protect(kvm, slot, gfn)) >> - kvm_flush_remote_tlbs(kvm); >> + kvm_slot_page_track_add_page_nolock(kvm, slot, gfn, mode); >> spin_unlock(&kvm->mmu_lock); >> } >> } >> +void kvm_slot_page_track_remove_page_nolock(struct kvm *kvm, >> + struct kvm_memory_slot *slot, >> + gfn_t gfn, >> + enum kvm_page_track_mode mode) >> +{ >> + WARN_ON(!check_mode(mode)); >> + >> + update_gfn_track(slot, gfn, mode, -1); >> + >> + /* >> + * allow large page mapping for the tracked page >> + * after the tracker is gone. >> + */ >> + kvm_mmu_gfn_allow_lpage(slot, gfn); >> +} >> + >> /* >> * remove the guest page from the tracking pool which stops the >> interception >> * of corresponding access on that page. It is the opposed >> operation of >> @@ -134,20 +160,12 @@ void kvm_page_track_remove_page(struct kvm >> *kvm, gfn_t gfn, >> struct kvm_memory_slot *slot; >> int i; >> - WARN_ON(!check_mode(mode)); >> - >> for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++) { >> slots = __kvm_memslots(kvm, i); >> slot = __gfn_to_memslot(slots, gfn); >> spin_lock(&kvm->mmu_lock); >> - update_gfn_track(slot, gfn, mode, -1); >> - >> - /* >> - * allow large page mapping for the tracked page >> - * after the tracker is gone. >> - */ >> - kvm_mmu_gfn_allow_lpage(slot, gfn); >> + kvm_slot_page_track_remove_page_nolock(kvm, slot, gfn, mode); > Looks you need to merge this part with patch 1, as you are modifying > kvm_page_track_{add,remove}_page here, which are introduced in your > patch 1. Should be patch 5. sorry again. Thanks, -Kai > > Thanks, > -Kai >> spin_unlock(&kvm->mmu_lock); >> } >> } > > -- > To unsubscribe from this list: send the line "unsubscribe kvm" in > the body of a message to majordomo@vger.kernel.org > More majordomo info at http://vger.kernel.org/majordomo-info.html > -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On 12/15/2015 03:52 PM, Kai Huang wrote: >> static bool __mmu_gfn_lpage_is_disallowed(gfn_t gfn, int level, >> @@ -2140,12 +2150,18 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu, >> hlist_add_head(&sp->hash_link, >> &vcpu->kvm->arch.mmu_page_hash[kvm_page_table_hashfn(gfn)]); >> if (!direct) { >> - if (rmap_write_protect(vcpu, gfn)) >> + /* >> + * we should do write protection before syncing pages >> + * otherwise the content of the synced shadow page may >> + * be inconsistent with guest page table. >> + */ >> + account_shadowed(vcpu->kvm, sp); >> + >> + if (level == PT_PAGE_TABLE_LEVEL && >> + rmap_write_protect(vcpu, gfn)) >> kvm_flush_remote_tlbs(vcpu->kvm); > I think your modification is good but I am little bit confused here. In account_shadowed, if > sp->role.level > PT_PAGE_TABLE_LEVEL, the sp->gfn is write protected, and this is reasonable. So why > write protecting the gfn of PT_PAGE_TABLE_LEVEL here? Because the shadow page will become 'sync' that means the shadow page will be synced with the page table in guest. So the shadow page need to be write-protected to avoid the guest page table is changed when we do the 'sync' thing. The shadow page need to be write-protected to avoid that guest page table is changed when we are syncing the shadow page table. See kvm_sync_pages() after doing rmap_write_protect(). >> /* >> * remove the guest page from the tracking pool which stops the interception >> * of corresponding access on that page. It is the opposed operation of >> @@ -134,20 +160,12 @@ void kvm_page_track_remove_page(struct kvm *kvm, gfn_t gfn, >> struct kvm_memory_slot *slot; >> int i; >> - WARN_ON(!check_mode(mode)); >> - >> for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++) { >> slots = __kvm_memslots(kvm, i); >> slot = __gfn_to_memslot(slots, gfn); >> spin_lock(&kvm->mmu_lock); >> - update_gfn_track(slot, gfn, mode, -1); >> - >> - /* >> - * allow large page mapping for the tracked page >> - * after the tracker is gone. >> - */ >> - kvm_mmu_gfn_allow_lpage(slot, gfn); >> + kvm_slot_page_track_remove_page_nolock(kvm, slot, gfn, mode); > Looks you need to merge this part with patch 1, as you are modifying > kvm_page_track_{add,remove}_page here, which are introduced in your patch 1. Indeed, it is better. -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On 12/15/2015 05:10 PM, Xiao Guangrong wrote: > > > On 12/15/2015 03:52 PM, Kai Huang wrote: > >>> static bool __mmu_gfn_lpage_is_disallowed(gfn_t gfn, int level, >>> @@ -2140,12 +2150,18 @@ static struct kvm_mmu_page >>> *kvm_mmu_get_page(struct kvm_vcpu *vcpu, >>> hlist_add_head(&sp->hash_link, >>> &vcpu->kvm->arch.mmu_page_hash[kvm_page_table_hashfn(gfn)]); >>> if (!direct) { >>> - if (rmap_write_protect(vcpu, gfn)) >>> + /* >>> + * we should do write protection before syncing pages >>> + * otherwise the content of the synced shadow page may >>> + * be inconsistent with guest page table. >>> + */ >>> + account_shadowed(vcpu->kvm, sp); >>> + >>> + if (level == PT_PAGE_TABLE_LEVEL && >>> + rmap_write_protect(vcpu, gfn)) >>> kvm_flush_remote_tlbs(vcpu->kvm); >> I think your modification is good but I am little bit confused here. >> In account_shadowed, if >> sp->role.level > PT_PAGE_TABLE_LEVEL, the sp->gfn is write protected, >> and this is reasonable. So why >> write protecting the gfn of PT_PAGE_TABLE_LEVEL here? > > Because the shadow page will become 'sync' that means the shadow page > will be synced > with the page table in guest. So the shadow page need to be > write-protected to avoid > the guest page table is changed when we do the 'sync' thing. > > The shadow page need to be write-protected to avoid that guest page > table is changed > when we are syncing the shadow page table. See kvm_sync_pages() after > doing > rmap_write_protect(). I see. So why are you treat PT_PAGE_TABLE_LEVEL gfn separately here? why this cannot be done in account_shadowed, as you did for upper level sp? Actually I am thinking whether account_shadowed is overdoing things. From it's name it should only *account* shadow sp, but now it also does write protection and disable large page mapping. Thanks, -Kai > >>> /* >>> * remove the guest page from the tracking pool which stops the >>> interception >>> * of corresponding access on that page. It is the opposed >>> operation of >>> @@ -134,20 +160,12 @@ void kvm_page_track_remove_page(struct kvm >>> *kvm, gfn_t gfn, >>> struct kvm_memory_slot *slot; >>> int i; >>> - WARN_ON(!check_mode(mode)); >>> - >>> for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++) { >>> slots = __kvm_memslots(kvm, i); >>> slot = __gfn_to_memslot(slots, gfn); >>> spin_lock(&kvm->mmu_lock); >>> - update_gfn_track(slot, gfn, mode, -1); >>> - >>> - /* >>> - * allow large page mapping for the tracked page >>> - * after the tracker is gone. >>> - */ >>> - kvm_mmu_gfn_allow_lpage(slot, gfn); >>> + kvm_slot_page_track_remove_page_nolock(kvm, slot, gfn, mode); >> Looks you need to merge this part with patch 1, as you are modifying >> kvm_page_track_{add,remove}_page here, which are introduced in your >> patch 1. > > Indeed, it is better. > > -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On 12/16/2015 03:51 PM, Kai Huang wrote: > > > On 12/15/2015 05:10 PM, Xiao Guangrong wrote: >> >> >> On 12/15/2015 03:52 PM, Kai Huang wrote: >> >>>> static bool __mmu_gfn_lpage_is_disallowed(gfn_t gfn, int level, >>>> @@ -2140,12 +2150,18 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu, >>>> hlist_add_head(&sp->hash_link, >>>> &vcpu->kvm->arch.mmu_page_hash[kvm_page_table_hashfn(gfn)]); >>>> if (!direct) { >>>> - if (rmap_write_protect(vcpu, gfn)) >>>> + /* >>>> + * we should do write protection before syncing pages >>>> + * otherwise the content of the synced shadow page may >>>> + * be inconsistent with guest page table. >>>> + */ >>>> + account_shadowed(vcpu->kvm, sp); >>>> + >>>> + if (level == PT_PAGE_TABLE_LEVEL && >>>> + rmap_write_protect(vcpu, gfn)) >>>> kvm_flush_remote_tlbs(vcpu->kvm); >>> I think your modification is good but I am little bit confused here. In account_shadowed, if >>> sp->role.level > PT_PAGE_TABLE_LEVEL, the sp->gfn is write protected, and this is reasonable. So why >>> write protecting the gfn of PT_PAGE_TABLE_LEVEL here? >> >> Because the shadow page will become 'sync' that means the shadow page will be synced >> with the page table in guest. So the shadow page need to be write-protected to avoid >> the guest page table is changed when we do the 'sync' thing. >> >> The shadow page need to be write-protected to avoid that guest page table is changed >> when we are syncing the shadow page table. See kvm_sync_pages() after doing >> rmap_write_protect(). > I see. So why are you treat PT_PAGE_TABLE_LEVEL gfn separately here? why this cannot be done in > account_shadowed, as you did for upper level sp? non-leaf shadow pages are keepking write-protected which page fault handler can not fix write access on it. And leaf shadow pages are not. > Actually I am thinking whether account_shadowed is > overdoing things. From it's name it should only *account* shadow sp, but now it also does write > protection and disable large page mapping. > Hmm.. disable large page mapping is already in current code... i think account_shadowed() can be understood as new page is taken into account, so protection things are needed there. But I am not good at naming function and also my english is not good enough, any other better name is welcome. ;) -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On 12/16/2015 04:39 PM, Xiao Guangrong wrote: > > > On 12/16/2015 03:51 PM, Kai Huang wrote: >> >> >> On 12/15/2015 05:10 PM, Xiao Guangrong wrote: >>> >>> >>> On 12/15/2015 03:52 PM, Kai Huang wrote: >>> >>>>> static bool __mmu_gfn_lpage_is_disallowed(gfn_t gfn, int level, >>>>> @@ -2140,12 +2150,18 @@ static struct kvm_mmu_page >>>>> *kvm_mmu_get_page(struct kvm_vcpu *vcpu, >>>>> hlist_add_head(&sp->hash_link, >>>>> &vcpu->kvm->arch.mmu_page_hash[kvm_page_table_hashfn(gfn)]); >>>>> if (!direct) { >>>>> - if (rmap_write_protect(vcpu, gfn)) >>>>> + /* >>>>> + * we should do write protection before syncing pages >>>>> + * otherwise the content of the synced shadow page may >>>>> + * be inconsistent with guest page table. >>>>> + */ >>>>> + account_shadowed(vcpu->kvm, sp); >>>>> + >>>>> + if (level == PT_PAGE_TABLE_LEVEL && >>>>> + rmap_write_protect(vcpu, gfn)) >>>>> kvm_flush_remote_tlbs(vcpu->kvm); >>>> I think your modification is good but I am little bit confused >>>> here. In account_shadowed, if >>>> sp->role.level > PT_PAGE_TABLE_LEVEL, the sp->gfn is write >>>> protected, and this is reasonable. So why >>>> write protecting the gfn of PT_PAGE_TABLE_LEVEL here? >>> >>> Because the shadow page will become 'sync' that means the shadow >>> page will be synced >>> with the page table in guest. So the shadow page need to be >>> write-protected to avoid >>> the guest page table is changed when we do the 'sync' thing. >>> >>> The shadow page need to be write-protected to avoid that guest page >>> table is changed >>> when we are syncing the shadow page table. See kvm_sync_pages() >>> after doing >>> rmap_write_protect(). >> I see. So why are you treat PT_PAGE_TABLE_LEVEL gfn separately here? >> why this cannot be done in >> account_shadowed, as you did for upper level sp? > > non-leaf shadow pages are keepking write-protected which page fault > handler can not fix write > access on it. And leaf shadow pages are not. My point is the original code didn't separate the two cases so I am not sure why you need to separate. Perhaps you want to make account_shadowed imply the non-leaf guest page table is write-protected while leaf page table is not. Thanks, -Kai >> Actually I am thinking whether account_shadowed is >> overdoing things. From it's name it should only *account* shadow sp, >> but now it also does write >> protection and disable large page mapping. >> > > Hmm.. disable large page mapping is already in current code... i think > account_shadowed() can > be understood as new page is taken into account, so protection things > are needed there. > > But I am not good at naming function and also my english is not good > enough, any other better name > is welcome. ;) > > -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On 12/17/2015 10:44 AM, Kai Huang wrote: > > > On 12/16/2015 04:39 PM, Xiao Guangrong wrote: >> >> >> On 12/16/2015 03:51 PM, Kai Huang wrote: >>> >>> >>> On 12/15/2015 05:10 PM, Xiao Guangrong wrote: >>>> >>>> >>>> On 12/15/2015 03:52 PM, Kai Huang wrote: >>>> >>>>>> static bool __mmu_gfn_lpage_is_disallowed(gfn_t gfn, int level, >>>>>> @@ -2140,12 +2150,18 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu, >>>>>> hlist_add_head(&sp->hash_link, >>>>>> &vcpu->kvm->arch.mmu_page_hash[kvm_page_table_hashfn(gfn)]); >>>>>> if (!direct) { >>>>>> - if (rmap_write_protect(vcpu, gfn)) >>>>>> + /* >>>>>> + * we should do write protection before syncing pages >>>>>> + * otherwise the content of the synced shadow page may >>>>>> + * be inconsistent with guest page table. >>>>>> + */ >>>>>> + account_shadowed(vcpu->kvm, sp); >>>>>> + >>>>>> + if (level == PT_PAGE_TABLE_LEVEL && >>>>>> + rmap_write_protect(vcpu, gfn)) >>>>>> kvm_flush_remote_tlbs(vcpu->kvm); >>>>> I think your modification is good but I am little bit confused here. In account_shadowed, if >>>>> sp->role.level > PT_PAGE_TABLE_LEVEL, the sp->gfn is write protected, and this is reasonable. >>>>> So why >>>>> write protecting the gfn of PT_PAGE_TABLE_LEVEL here? >>>> >>>> Because the shadow page will become 'sync' that means the shadow page will be synced >>>> with the page table in guest. So the shadow page need to be write-protected to avoid >>>> the guest page table is changed when we do the 'sync' thing. >>>> >>>> The shadow page need to be write-protected to avoid that guest page table is changed >>>> when we are syncing the shadow page table. See kvm_sync_pages() after doing >>>> rmap_write_protect(). >>> I see. So why are you treat PT_PAGE_TABLE_LEVEL gfn separately here? why this cannot be done in >>> account_shadowed, as you did for upper level sp? >> >> non-leaf shadow pages are keepking write-protected which page fault handler can not fix write >> access on it. And leaf shadow pages are not. > My point is the original code didn't separate the two cases so I am not sure why you need to > separate. Perhaps you want to make account_shadowed imply the non-leaf guest page table is > write-protected while leaf page table is not. That is why we get improvement after this patchset, we seep up the case for the write access happens on non-leaf page tables. ;) -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
diff --git a/arch/x86/include/asm/kvm_page_track.h b/arch/x86/include/asm/kvm_page_track.h index 6744234..3447dac 100644 --- a/arch/x86/include/asm/kvm_page_track.h +++ b/arch/x86/include/asm/kvm_page_track.h @@ -41,8 +41,16 @@ int kvm_page_track_create_memslot(struct kvm_memory_slot *slot, void kvm_page_track_free_memslot(struct kvm_memory_slot *free, struct kvm_memory_slot *dont); +void +kvm_slot_page_track_add_page_nolock(struct kvm *kvm, + struct kvm_memory_slot *slot, gfn_t gfn, + enum kvm_page_track_mode mode); void kvm_page_track_add_page(struct kvm *kvm, gfn_t gfn, enum kvm_page_track_mode mode); +void kvm_slot_page_track_remove_page_nolock(struct kvm *kvm, + struct kvm_memory_slot *slot, + gfn_t gfn, + enum kvm_page_track_mode mode); void kvm_page_track_remove_page(struct kvm *kvm, gfn_t gfn, enum kvm_page_track_mode mode); bool kvm_page_track_check_mode(struct kvm_vcpu *vcpu, gfn_t gfn, diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index b23f9fc..5a2ca73 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -806,11 +806,17 @@ static void account_shadowed(struct kvm *kvm, struct kvm_mmu_page *sp) struct kvm_memory_slot *slot; gfn_t gfn; + kvm->arch.indirect_shadow_pages++; gfn = sp->gfn; slots = kvm_memslots_for_spte_role(kvm, sp->role); slot = __gfn_to_memslot(slots, gfn); + + /* the non-leaf shadow pages are keeping readonly. */ + if (sp->role.level > PT_PAGE_TABLE_LEVEL) + return kvm_slot_page_track_add_page_nolock(kvm, slot, gfn, + KVM_PAGE_TRACK_WRITE); + kvm_mmu_gfn_disallow_lpage(slot, gfn); - kvm->arch.indirect_shadow_pages++; } static void unaccount_shadowed(struct kvm *kvm, struct kvm_mmu_page *sp) @@ -819,11 +825,15 @@ static void unaccount_shadowed(struct kvm *kvm, struct kvm_mmu_page *sp) struct kvm_memory_slot *slot; gfn_t gfn; + kvm->arch.indirect_shadow_pages--; gfn = sp->gfn; slots = kvm_memslots_for_spte_role(kvm, sp->role); slot = __gfn_to_memslot(slots, gfn); + if (sp->role.level > PT_PAGE_TABLE_LEVEL) + return kvm_slot_page_track_remove_page_nolock(kvm, slot, gfn, + KVM_PAGE_TRACK_WRITE); + kvm_mmu_gfn_allow_lpage(slot, gfn); - kvm->arch.indirect_shadow_pages--; } static bool __mmu_gfn_lpage_is_disallowed(gfn_t gfn, int level, @@ -2140,12 +2150,18 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu, hlist_add_head(&sp->hash_link, &vcpu->kvm->arch.mmu_page_hash[kvm_page_table_hashfn(gfn)]); if (!direct) { - if (rmap_write_protect(vcpu, gfn)) + /* + * we should do write protection before syncing pages + * otherwise the content of the synced shadow page may + * be inconsistent with guest page table. + */ + account_shadowed(vcpu->kvm, sp); + + if (level == PT_PAGE_TABLE_LEVEL && + rmap_write_protect(vcpu, gfn)) kvm_flush_remote_tlbs(vcpu->kvm); if (level > PT_PAGE_TABLE_LEVEL && need_sync) kvm_sync_pages(vcpu, gfn); - - account_shadowed(vcpu->kvm, sp); } sp->mmu_valid_gen = vcpu->kvm->arch.mmu_valid_gen; init_shadow_page_table(sp); diff --git a/arch/x86/kvm/page_track.c b/arch/x86/kvm/page_track.c index 84420df..87554d3 100644 --- a/arch/x86/kvm/page_track.c +++ b/arch/x86/kvm/page_track.c @@ -77,6 +77,26 @@ static void update_gfn_track(struct kvm_memory_slot *slot, gfn_t gfn, WARN_ON(val < 0); } +void +kvm_slot_page_track_add_page_nolock(struct kvm *kvm, + struct kvm_memory_slot *slot, gfn_t gfn, + enum kvm_page_track_mode mode) +{ + WARN_ON(!check_mode(mode)); + + update_gfn_track(slot, gfn, mode, 1); + + /* + * new track stops large page mapping for the + * tracked page. + */ + kvm_mmu_gfn_disallow_lpage(slot, gfn); + + if (mode == KVM_PAGE_TRACK_WRITE) + if (kvm_mmu_slot_gfn_write_protect(kvm, slot, gfn)) + kvm_flush_remote_tlbs(kvm); +} + /* * add guest page to the tracking pool so that corresponding access on that * page will be intercepted. @@ -101,21 +121,27 @@ void kvm_page_track_add_page(struct kvm *kvm, gfn_t gfn, slot = __gfn_to_memslot(slots, gfn); spin_lock(&kvm->mmu_lock); - update_gfn_track(slot, gfn, mode, 1); - - /* - * new track stops large page mapping for the - * tracked page. - */ - kvm_mmu_gfn_disallow_lpage(slot, gfn); - - if (mode == KVM_PAGE_TRACK_WRITE) - if (kvm_mmu_slot_gfn_write_protect(kvm, slot, gfn)) - kvm_flush_remote_tlbs(kvm); + kvm_slot_page_track_add_page_nolock(kvm, slot, gfn, mode); spin_unlock(&kvm->mmu_lock); } } +void kvm_slot_page_track_remove_page_nolock(struct kvm *kvm, + struct kvm_memory_slot *slot, + gfn_t gfn, + enum kvm_page_track_mode mode) +{ + WARN_ON(!check_mode(mode)); + + update_gfn_track(slot, gfn, mode, -1); + + /* + * allow large page mapping for the tracked page + * after the tracker is gone. + */ + kvm_mmu_gfn_allow_lpage(slot, gfn); +} + /* * remove the guest page from the tracking pool which stops the interception * of corresponding access on that page. It is the opposed operation of @@ -134,20 +160,12 @@ void kvm_page_track_remove_page(struct kvm *kvm, gfn_t gfn, struct kvm_memory_slot *slot; int i; - WARN_ON(!check_mode(mode)); - for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++) { slots = __kvm_memslots(kvm, i); slot = __gfn_to_memslot(slots, gfn); spin_lock(&kvm->mmu_lock); - update_gfn_track(slot, gfn, mode, -1); - - /* - * allow large page mapping for the tracked page - * after the tracker is gone. - */ - kvm_mmu_gfn_allow_lpage(slot, gfn); + kvm_slot_page_track_remove_page_nolock(kvm, slot, gfn, mode); spin_unlock(&kvm->mmu_lock); } }
non-leaf shadow pages are always write protected, it can be the user of page track Signed-off-by: Xiao Guangrong <guangrong.xiao@linux.intel.com> --- arch/x86/include/asm/kvm_page_track.h | 8 +++++ arch/x86/kvm/mmu.c | 26 +++++++++++++--- arch/x86/kvm/page_track.c | 58 +++++++++++++++++++++++------------ 3 files changed, 67 insertions(+), 25 deletions(-)