Message ID | 1455449503-20993-8-git-send-email-guangrong.xiao@linux.intel.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
On 14/02/2016 12:31, Xiao Guangrong wrote: > Notifier list is introduced so that any node wants to receive the track > event can register to the list > > Two APIs are introduced here: > - kvm_page_track_register_notifier(): register the notifier to receive > track event > > - kvm_page_track_unregister_notifier(): stop receiving track event by > unregister the notifier > > The callback, node->track_write() is called when a write access on the > write tracked page happens > > Signed-off-by: Xiao Guangrong <guangrong.xiao@linux.intel.com> > --- > arch/x86/include/asm/kvm_host.h | 1 + > arch/x86/include/asm/kvm_page_track.h | 39 ++++++++++++++++++++ > arch/x86/kvm/page_track.c | 67 +++++++++++++++++++++++++++++++++++ > arch/x86/kvm/x86.c | 4 +++ > 4 files changed, 111 insertions(+) > > diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h > index d8931d0..282bc2f 100644 > --- a/arch/x86/include/asm/kvm_host.h > +++ b/arch/x86/include/asm/kvm_host.h > @@ -696,6 +696,7 @@ struct kvm_arch { > */ > struct list_head active_mmu_pages; > struct list_head zapped_obsolete_pages; > + struct kvm_page_track_notifier_head track_notifier_head; > > struct list_head assigned_dev_head; > struct iommu_domain *iommu_domain; > diff --git a/arch/x86/include/asm/kvm_page_track.h b/arch/x86/include/asm/kvm_page_track.h > index 97ac9c3..1aae4ef 100644 > --- a/arch/x86/include/asm/kvm_page_track.h > +++ b/arch/x86/include/asm/kvm_page_track.h > @@ -6,6 +6,36 @@ enum kvm_page_track_mode { > KVM_PAGE_TRACK_MAX, > }; > > +/* > + * The notifier represented by @kvm_page_track_notifier_node is linked into > + * the head which will be notified when guest is triggering the track event. > + * > + * Write access on the head is protected by kvm->mmu_lock, read access > + * is protected by track_srcu. > + */ > +struct kvm_page_track_notifier_head { > + struct srcu_struct track_srcu; > + struct hlist_head track_notifier_list; > +}; > + > +struct kvm_page_track_notifier_node { > + struct hlist_node node; > + > + /* > + * It is called when guest is writing the write-tracked page > + * and write emulation is finished at that time. > + * > + * @vcpu: the vcpu where the write access happened. > + * @gpa: the physical address written by guest. > + * @new: the data was written to the address. > + * @bytes: the written length. > + */ > + void (*track_write)(struct kvm_vcpu *vcpu, gpa_t gpa, const u8 *new, > + int bytes); > +}; > + > +void kvm_page_track_init(struct kvm *kvm); > + > void kvm_page_track_free_memslot(struct kvm_memory_slot *free, > struct kvm_memory_slot *dont); > int kvm_page_track_create_memslot(struct kvm_memory_slot *slot, > @@ -25,4 +55,13 @@ void kvm_page_track_remove_page(struct kvm *kvm, gfn_t gfn, > enum kvm_page_track_mode mode); > bool kvm_page_track_check_mode(struct kvm_vcpu *vcpu, gfn_t gfn, > enum kvm_page_track_mode mode); > + > +void > +kvm_page_track_register_notifier(struct kvm *kvm, > + struct kvm_page_track_notifier_node *n); > +void > +kvm_page_track_unregister_notifier(struct kvm *kvm, > + struct kvm_page_track_notifier_node *n); > +void kvm_page_track_write(struct kvm_vcpu *vcpu, gpa_t gpa, const u8 *new, > + int bytes); > #endif > diff --git a/arch/x86/kvm/page_track.c b/arch/x86/kvm/page_track.c > index de9b32f..0692cc6 100644 > --- a/arch/x86/kvm/page_track.c > +++ b/arch/x86/kvm/page_track.c > @@ -188,3 +188,70 @@ bool kvm_page_track_check_mode(struct kvm_vcpu *vcpu, gfn_t gfn, > > return !!ACCESS_ONCE(slot->arch.gfn_track[mode][index]); > } > + > +void kvm_page_track_init(struct kvm *kvm) > +{ > + struct kvm_page_track_notifier_head *head; > + > + head = &kvm->arch.track_notifier_head; > + init_srcu_struct(&head->track_srcu); > + INIT_HLIST_HEAD(&head->track_notifier_list); > +} > + > +/* > + * register the notifier so that event interception for the tracked guest > + * pages can be received. > + */ > +void > +kvm_page_track_register_notifier(struct kvm *kvm, > + struct kvm_page_track_notifier_node *n) > +{ > + struct kvm_page_track_notifier_head *head; > + > + head = &kvm->arch.track_notifier_head; > + > + spin_lock(&kvm->mmu_lock); > + hlist_add_head_rcu(&n->node, &head->track_notifier_list); > + spin_unlock(&kvm->mmu_lock); > +} > + > +/* > + * stop receiving the event interception. It is the opposed operation of > + * kvm_page_track_register_notifier(). > + */ > +void > +kvm_page_track_unregister_notifier(struct kvm *kvm, > + struct kvm_page_track_notifier_node *n) > +{ > + struct kvm_page_track_notifier_head *head; > + > + head = &kvm->arch.track_notifier_head; > + > + spin_lock(&kvm->mmu_lock); > + hlist_del_rcu(&n->node); > + spin_unlock(&kvm->mmu_lock); > + synchronize_srcu(&head->track_srcu); > +} > + > +/* > + * Notify the node that write access is intercepted and write emulation is > + * finished at this time. > + * > + * The node should figure out if the written page is the one that node is > + * interested in by itself. > + */ > +void kvm_page_track_write(struct kvm_vcpu *vcpu, gpa_t gpa, const u8 *new, > + int bytes) > +{ > + struct kvm_page_track_notifier_head *head; > + struct kvm_page_track_notifier_node *n; > + int idx; > + > + head = &vcpu->kvm->arch.track_notifier_head; Please check outside SRCU if the notifier list is empty. If so, there is no need to do the (relatively) expensive srcu_read_lock/unlock. Paolo > + idx = srcu_read_lock(&head->track_srcu); > + hlist_for_each_entry_rcu(n, &head->track_notifier_list, node) > + if (n->track_write) > + n->track_write(vcpu, gpa, new, bytes); > + srcu_read_unlock(&head->track_srcu, idx); > +} > diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c > index e25ebb7..98019b6 100644 > --- a/arch/x86/kvm/x86.c > +++ b/arch/x86/kvm/x86.c > @@ -4370,6 +4370,7 @@ int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa, > if (ret < 0) > return 0; A kvm_vcpu_mark_page_dirty is missing here, isn't it? I can take care of it, but it would be great if you double-checked this. If so, that should be fixed in stable kernels too. Can you add a kvm_vcpu_note_page_write(vcpu, gpa, val, bytes) function that takes care of calling kvm_vcpu_mark_page_dirty, kvm_mmu_pte_write and kvm_page_track-write? Thanks, Paolo > kvm_mmu_pte_write(vcpu, gpa, val, bytes); > + kvm_page_track_write(vcpu, gpa, val, bytes); > return 1; > } > > @@ -4628,6 +4629,7 @@ static int emulator_cmpxchg_emulated(struct x86_emulate_ctxt *ctxt, > > kvm_vcpu_mark_page_dirty(vcpu, gpa >> PAGE_SHIFT); > kvm_mmu_pte_write(vcpu, gpa, new, bytes); > + kvm_page_track_write(vcpu, gpa, new, bytes); > > return X86EMUL_CONTINUE; > > @@ -7748,6 +7750,8 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) > INIT_DELAYED_WORK(&kvm->arch.kvmclock_update_work, kvmclock_update_fn); > INIT_DELAYED_WORK(&kvm->arch.kvmclock_sync_work, kvmclock_sync_fn); > > + kvm_page_track_init(kvm); > + > return 0; > } > > -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On 02/19/2016 07:51 PM, Paolo Bonzini wrote: > > > On 14/02/2016 12:31, Xiao Guangrong wrote: >> +void kvm_page_track_write(struct kvm_vcpu *vcpu, gpa_t gpa, const u8 *new, >> + int bytes) >> +{ >> + struct kvm_page_track_notifier_head *head; >> + struct kvm_page_track_notifier_node *n; >> + int idx; >> + >> + head = &vcpu->kvm->arch.track_notifier_head; > > Please check outside SRCU if the notifier list is empty. If so, there > is no need to do the (relatively) expensive srcu_read_lock/unlock. > Good to me. I will check it by calling hlist_empty() first before holding the srcu read lock. > Paolo > >> + idx = srcu_read_lock(&head->track_srcu); >> + hlist_for_each_entry_rcu(n, &head->track_notifier_list, node) >> + if (n->track_write) >> + n->track_write(vcpu, gpa, new, bytes); >> + srcu_read_unlock(&head->track_srcu, idx); >> +} >> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c >> index e25ebb7..98019b6 100644 >> --- a/arch/x86/kvm/x86.c >> +++ b/arch/x86/kvm/x86.c >> @@ -4370,6 +4370,7 @@ int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa, >> if (ret < 0) >> return 0; > > A kvm_vcpu_mark_page_dirty is missing here, isn't it? I can take care > of it, but it would be great if you double-checked this. If so, that > should be fixed in stable kernels too. No. It's already been handled in emulator_write_phys() -> kvm_vcpu_write_guest() -> kvm_vcpu_write_guest_page() -> __kvm_write_guest_page(). > > Can you add a kvm_vcpu_note_page_write(vcpu, gpa, val, bytes) function > that takes care of calling kvm_vcpu_mark_page_dirty, kvm_mmu_pte_write > and kvm_page_track-write? > After this patchset, kvm_mmu_pte_write is only a static notifier callback called by kvm_page_track_write(). And the dirty tracking in emulator_write_phys() is handled in a public API (as my explanation above), in emulator_cmpxchg_emulated is handled by itself. So i think it is better to leaving dirty tracking to the separate paths, no? :) -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On 23/02/2016 05:34, Xiao Guangrong wrote: >> >> A kvm_vcpu_mark_page_dirty is missing here, isn't it? I can take care >> of it, but it would be great if you double-checked this. If so, that >> should be fixed in stable kernels too. > > No. It's already been handled in emulator_write_phys() -> > kvm_vcpu_write_guest() > -> kvm_vcpu_write_guest_page() -> __kvm_write_guest_page(). You're right... >> >> Can you add a kvm_vcpu_note_page_write(vcpu, gpa, val, bytes) function >> that takes care of calling kvm_vcpu_mark_page_dirty, kvm_mmu_pte_write >> and kvm_page_track-write? >> > > After this patchset, kvm_mmu_pte_write is only a static notifier > callback called > by kvm_page_track_write(). > > And the dirty tracking in emulator_write_phys() is handled in a public > API (as my > explanation above), in emulator_cmpxchg_emulated is handled by itself. > So i think > it is better to leaving dirty tracking to the separate paths, no? :) ... and here it is indeed better to leave things as they are in v3. Thanks, Paolo -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index d8931d0..282bc2f 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -696,6 +696,7 @@ struct kvm_arch { */ struct list_head active_mmu_pages; struct list_head zapped_obsolete_pages; + struct kvm_page_track_notifier_head track_notifier_head; struct list_head assigned_dev_head; struct iommu_domain *iommu_domain; diff --git a/arch/x86/include/asm/kvm_page_track.h b/arch/x86/include/asm/kvm_page_track.h index 97ac9c3..1aae4ef 100644 --- a/arch/x86/include/asm/kvm_page_track.h +++ b/arch/x86/include/asm/kvm_page_track.h @@ -6,6 +6,36 @@ enum kvm_page_track_mode { KVM_PAGE_TRACK_MAX, }; +/* + * The notifier represented by @kvm_page_track_notifier_node is linked into + * the head which will be notified when guest is triggering the track event. + * + * Write access on the head is protected by kvm->mmu_lock, read access + * is protected by track_srcu. + */ +struct kvm_page_track_notifier_head { + struct srcu_struct track_srcu; + struct hlist_head track_notifier_list; +}; + +struct kvm_page_track_notifier_node { + struct hlist_node node; + + /* + * It is called when guest is writing the write-tracked page + * and write emulation is finished at that time. + * + * @vcpu: the vcpu where the write access happened. + * @gpa: the physical address written by guest. + * @new: the data was written to the address. + * @bytes: the written length. + */ + void (*track_write)(struct kvm_vcpu *vcpu, gpa_t gpa, const u8 *new, + int bytes); +}; + +void kvm_page_track_init(struct kvm *kvm); + void kvm_page_track_free_memslot(struct kvm_memory_slot *free, struct kvm_memory_slot *dont); int kvm_page_track_create_memslot(struct kvm_memory_slot *slot, @@ -25,4 +55,13 @@ void kvm_page_track_remove_page(struct kvm *kvm, gfn_t gfn, enum kvm_page_track_mode mode); bool kvm_page_track_check_mode(struct kvm_vcpu *vcpu, gfn_t gfn, enum kvm_page_track_mode mode); + +void +kvm_page_track_register_notifier(struct kvm *kvm, + struct kvm_page_track_notifier_node *n); +void +kvm_page_track_unregister_notifier(struct kvm *kvm, + struct kvm_page_track_notifier_node *n); +void kvm_page_track_write(struct kvm_vcpu *vcpu, gpa_t gpa, const u8 *new, + int bytes); #endif diff --git a/arch/x86/kvm/page_track.c b/arch/x86/kvm/page_track.c index de9b32f..0692cc6 100644 --- a/arch/x86/kvm/page_track.c +++ b/arch/x86/kvm/page_track.c @@ -188,3 +188,70 @@ bool kvm_page_track_check_mode(struct kvm_vcpu *vcpu, gfn_t gfn, return !!ACCESS_ONCE(slot->arch.gfn_track[mode][index]); } + +void kvm_page_track_init(struct kvm *kvm) +{ + struct kvm_page_track_notifier_head *head; + + head = &kvm->arch.track_notifier_head; + init_srcu_struct(&head->track_srcu); + INIT_HLIST_HEAD(&head->track_notifier_list); +} + +/* + * register the notifier so that event interception for the tracked guest + * pages can be received. + */ +void +kvm_page_track_register_notifier(struct kvm *kvm, + struct kvm_page_track_notifier_node *n) +{ + struct kvm_page_track_notifier_head *head; + + head = &kvm->arch.track_notifier_head; + + spin_lock(&kvm->mmu_lock); + hlist_add_head_rcu(&n->node, &head->track_notifier_list); + spin_unlock(&kvm->mmu_lock); +} + +/* + * stop receiving the event interception. It is the opposed operation of + * kvm_page_track_register_notifier(). + */ +void +kvm_page_track_unregister_notifier(struct kvm *kvm, + struct kvm_page_track_notifier_node *n) +{ + struct kvm_page_track_notifier_head *head; + + head = &kvm->arch.track_notifier_head; + + spin_lock(&kvm->mmu_lock); + hlist_del_rcu(&n->node); + spin_unlock(&kvm->mmu_lock); + synchronize_srcu(&head->track_srcu); +} + +/* + * Notify the node that write access is intercepted and write emulation is + * finished at this time. + * + * The node should figure out if the written page is the one that node is + * interested in by itself. + */ +void kvm_page_track_write(struct kvm_vcpu *vcpu, gpa_t gpa, const u8 *new, + int bytes) +{ + struct kvm_page_track_notifier_head *head; + struct kvm_page_track_notifier_node *n; + int idx; + + head = &vcpu->kvm->arch.track_notifier_head; + + idx = srcu_read_lock(&head->track_srcu); + hlist_for_each_entry_rcu(n, &head->track_notifier_list, node) + if (n->track_write) + n->track_write(vcpu, gpa, new, bytes); + srcu_read_unlock(&head->track_srcu, idx); +} diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index e25ebb7..98019b6 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -4370,6 +4370,7 @@ int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa, if (ret < 0) return 0; kvm_mmu_pte_write(vcpu, gpa, val, bytes); + kvm_page_track_write(vcpu, gpa, val, bytes); return 1; } @@ -4628,6 +4629,7 @@ static int emulator_cmpxchg_emulated(struct x86_emulate_ctxt *ctxt, kvm_vcpu_mark_page_dirty(vcpu, gpa >> PAGE_SHIFT); kvm_mmu_pte_write(vcpu, gpa, new, bytes); + kvm_page_track_write(vcpu, gpa, new, bytes); return X86EMUL_CONTINUE; @@ -7748,6 +7750,8 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) INIT_DELAYED_WORK(&kvm->arch.kvmclock_update_work, kvmclock_update_fn); INIT_DELAYED_WORK(&kvm->arch.kvmclock_sync_work, kvmclock_sync_fn); + kvm_page_track_init(kvm); + return 0; }
Notifier list is introduced so that any node wants to receive the track event can register to the list Two APIs are introduced here: - kvm_page_track_register_notifier(): register the notifier to receive track event - kvm_page_track_unregister_notifier(): stop receiving track event by unregister the notifier The callback, node->track_write() is called when a write access on the write tracked page happens Signed-off-by: Xiao Guangrong <guangrong.xiao@linux.intel.com> --- arch/x86/include/asm/kvm_host.h | 1 + arch/x86/include/asm/kvm_page_track.h | 39 ++++++++++++++++++++ arch/x86/kvm/page_track.c | 67 +++++++++++++++++++++++++++++++++++ arch/x86/kvm/x86.c | 4 +++ 4 files changed, 111 insertions(+)