Message ID | 20230311002258.852397-11-seanjc@google.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | drm/i915/gvt: KVM: KVMGT fixes and page-track cleanups | expand |
Reviewed-by: Yan Zhao <yan.y.zhao@intel.com> On Fri, Mar 10, 2023 at 04:22:41PM -0800, Sean Christopherson wrote: > Use vgpu_lock instead of KVM's mmu_lock to protect accesses to the hash > table used to track which gfns are write-protected when shadowing the > guest's GTT, and hoist the acquisition of vgpu_lock from > intel_vgpu_page_track_handler() out to its sole caller, > kvmgt_page_track_write(). > > This fixes a bug where kvmgt_page_track_write(), which doesn't hold > kvm->mmu_lock, could race with intel_gvt_page_track_remove() and trigger > a use-after-free. > > Fixing kvmgt_page_track_write() by taking kvm->mmu_lock is not an option > as mmu_lock is a r/w spinlock, and intel_vgpu_page_track_handler() might > sleep when acquiring vgpu->cache_lock deep down the callstack: > > intel_vgpu_page_track_handler() > | > |-> page_track->handler / ppgtt_write_protection_handler() > | > |-> ppgtt_handle_guest_write_page_table_bytes() > | > |-> ppgtt_handle_guest_write_page_table() > | > |-> ppgtt_handle_guest_entry_removal() > | > |-> ppgtt_invalidate_pte() > | > |-> intel_gvt_dma_unmap_guest_page() > | > |-> mutex_lock(&vgpu->cache_lock); > > Signed-off-by: Sean Christopherson <seanjc@google.com> > --- > drivers/gpu/drm/i915/gvt/kvmgt.c | 55 +++++++++++++++------------ > drivers/gpu/drm/i915/gvt/page_track.c | 10 +---- > 2 files changed, 33 insertions(+), 32 deletions(-) > > diff --git a/drivers/gpu/drm/i915/gvt/kvmgt.c b/drivers/gpu/drm/i915/gvt/kvmgt.c > index 68be66395598..9824d075562e 100644 > --- a/drivers/gpu/drm/i915/gvt/kvmgt.c > +++ b/drivers/gpu/drm/i915/gvt/kvmgt.c > @@ -366,6 +366,8 @@ __kvmgt_protect_table_find(struct intel_vgpu *info, gfn_t gfn) > { > struct kvmgt_pgfn *p, *res = NULL; > > + lockdep_assert_held(&info->vgpu_lock); > + > hash_for_each_possible(info->ptable, p, hnode, gfn) { > if (gfn == p->gfn) { > res = p; > @@ -1567,6 +1569,9 @@ int intel_gvt_page_track_add(struct intel_vgpu *info, u64 gfn) > if (!test_bit(INTEL_VGPU_STATUS_ATTACHED, info->status)) > return -ESRCH; > > + if (kvmgt_gfn_is_write_protected(info, gfn)) > + return 0; > + > idx = srcu_read_lock(&kvm->srcu); > slot = gfn_to_memslot(kvm, gfn); > if (!slot) { > @@ -1575,16 +1580,12 @@ int intel_gvt_page_track_add(struct intel_vgpu *info, u64 gfn) > } > > write_lock(&kvm->mmu_lock); > - > - if (kvmgt_gfn_is_write_protected(info, gfn)) > - goto out; > - > kvm_slot_page_track_add_page(kvm, slot, gfn, KVM_PAGE_TRACK_WRITE); > + write_unlock(&kvm->mmu_lock); > + > + srcu_read_unlock(&kvm->srcu, idx); > + > kvmgt_protect_table_add(info, gfn); > - > -out: > - write_unlock(&kvm->mmu_lock); > - srcu_read_unlock(&kvm->srcu, idx); > return 0; > } > > @@ -1597,24 +1598,22 @@ int intel_gvt_page_track_remove(struct intel_vgpu *info, u64 gfn) > if (!test_bit(INTEL_VGPU_STATUS_ATTACHED, info->status)) > return -ESRCH; > > - idx = srcu_read_lock(&kvm->srcu); > - slot = gfn_to_memslot(kvm, gfn); > - if (!slot) { > - srcu_read_unlock(&kvm->srcu, idx); > - return -EINVAL; > - } > - > - write_lock(&kvm->mmu_lock); > - > if (!kvmgt_gfn_is_write_protected(info, gfn)) > - goto out; > + return 0; > > + idx = srcu_read_lock(&kvm->srcu); > + slot = gfn_to_memslot(kvm, gfn); > + if (!slot) { > + srcu_read_unlock(&kvm->srcu, idx); > + return -EINVAL; > + } > + > + write_lock(&kvm->mmu_lock); > kvm_slot_page_track_remove_page(kvm, slot, gfn, KVM_PAGE_TRACK_WRITE); > + write_unlock(&kvm->mmu_lock); > + srcu_read_unlock(&kvm->srcu, idx); > + > kvmgt_protect_table_del(info, gfn); > - > -out: > - write_unlock(&kvm->mmu_lock); > - srcu_read_unlock(&kvm->srcu, idx); > return 0; > } > > @@ -1625,9 +1624,13 @@ static void kvmgt_page_track_write(struct kvm_vcpu *vcpu, gpa_t gpa, > struct intel_vgpu *info = > container_of(node, struct intel_vgpu, track_node); > > + mutex_lock(&info->vgpu_lock); > + > if (kvmgt_gfn_is_write_protected(info, gpa_to_gfn(gpa))) > intel_vgpu_page_track_handler(info, gpa, > (void *)val, len); > + > + mutex_unlock(&info->vgpu_lock); > } > > static void kvmgt_page_track_flush_slot(struct kvm *kvm, > @@ -1639,16 +1642,20 @@ static void kvmgt_page_track_flush_slot(struct kvm *kvm, > struct intel_vgpu *info = > container_of(node, struct intel_vgpu, track_node); > > - write_lock(&kvm->mmu_lock); > + mutex_lock(&info->vgpu_lock); > + > for (i = 0; i < slot->npages; i++) { > gfn = slot->base_gfn + i; > if (kvmgt_gfn_is_write_protected(info, gfn)) { > + write_lock(&kvm->mmu_lock); > kvm_slot_page_track_remove_page(kvm, slot, gfn, > KVM_PAGE_TRACK_WRITE); > + write_unlock(&kvm->mmu_lock); > + > kvmgt_protect_table_del(info, gfn); > } > } > - write_unlock(&kvm->mmu_lock); > + mutex_unlock(&info->vgpu_lock); > } > > void intel_vgpu_detach_regions(struct intel_vgpu *vgpu) > diff --git a/drivers/gpu/drm/i915/gvt/page_track.c b/drivers/gpu/drm/i915/gvt/page_track.c > index df34e73cba41..60a65435556d 100644 > --- a/drivers/gpu/drm/i915/gvt/page_track.c > +++ b/drivers/gpu/drm/i915/gvt/page_track.c > @@ -162,13 +162,9 @@ int intel_vgpu_page_track_handler(struct intel_vgpu *vgpu, u64 gpa, > struct intel_vgpu_page_track *page_track; > int ret = 0; > > - mutex_lock(&vgpu->vgpu_lock); > - > page_track = intel_vgpu_find_page_track(vgpu, gpa >> PAGE_SHIFT); > - if (!page_track) { > - ret = -ENXIO; > - goto out; > - } > + if (!page_track) > + return -ENXIO; > > if (unlikely(vgpu->failsafe)) { > /* Remove write protection to prevent furture traps. */ > @@ -179,7 +175,5 @@ int intel_vgpu_page_track_handler(struct intel_vgpu *vgpu, u64 gpa, > gvt_err("guest page write error, gpa %llx\n", gpa); > } > > -out: > - mutex_unlock(&vgpu->vgpu_lock); > return ret; > } > -- > 2.40.0.rc1.284.g88254d51c5-goog >
diff --git a/drivers/gpu/drm/i915/gvt/kvmgt.c b/drivers/gpu/drm/i915/gvt/kvmgt.c index 68be66395598..9824d075562e 100644 --- a/drivers/gpu/drm/i915/gvt/kvmgt.c +++ b/drivers/gpu/drm/i915/gvt/kvmgt.c @@ -366,6 +366,8 @@ __kvmgt_protect_table_find(struct intel_vgpu *info, gfn_t gfn) { struct kvmgt_pgfn *p, *res = NULL; + lockdep_assert_held(&info->vgpu_lock); + hash_for_each_possible(info->ptable, p, hnode, gfn) { if (gfn == p->gfn) { res = p; @@ -1567,6 +1569,9 @@ int intel_gvt_page_track_add(struct intel_vgpu *info, u64 gfn) if (!test_bit(INTEL_VGPU_STATUS_ATTACHED, info->status)) return -ESRCH; + if (kvmgt_gfn_is_write_protected(info, gfn)) + return 0; + idx = srcu_read_lock(&kvm->srcu); slot = gfn_to_memslot(kvm, gfn); if (!slot) { @@ -1575,16 +1580,12 @@ int intel_gvt_page_track_add(struct intel_vgpu *info, u64 gfn) } write_lock(&kvm->mmu_lock); - - if (kvmgt_gfn_is_write_protected(info, gfn)) - goto out; - kvm_slot_page_track_add_page(kvm, slot, gfn, KVM_PAGE_TRACK_WRITE); + write_unlock(&kvm->mmu_lock); + + srcu_read_unlock(&kvm->srcu, idx); + kvmgt_protect_table_add(info, gfn); - -out: - write_unlock(&kvm->mmu_lock); - srcu_read_unlock(&kvm->srcu, idx); return 0; } @@ -1597,24 +1598,22 @@ int intel_gvt_page_track_remove(struct intel_vgpu *info, u64 gfn) if (!test_bit(INTEL_VGPU_STATUS_ATTACHED, info->status)) return -ESRCH; - idx = srcu_read_lock(&kvm->srcu); - slot = gfn_to_memslot(kvm, gfn); - if (!slot) { - srcu_read_unlock(&kvm->srcu, idx); - return -EINVAL; - } - - write_lock(&kvm->mmu_lock); - if (!kvmgt_gfn_is_write_protected(info, gfn)) - goto out; + return 0; + idx = srcu_read_lock(&kvm->srcu); + slot = gfn_to_memslot(kvm, gfn); + if (!slot) { + srcu_read_unlock(&kvm->srcu, idx); + return -EINVAL; + } + + write_lock(&kvm->mmu_lock); kvm_slot_page_track_remove_page(kvm, slot, gfn, KVM_PAGE_TRACK_WRITE); + write_unlock(&kvm->mmu_lock); + srcu_read_unlock(&kvm->srcu, idx); + kvmgt_protect_table_del(info, gfn); - -out: - write_unlock(&kvm->mmu_lock); - srcu_read_unlock(&kvm->srcu, idx); return 0; } @@ -1625,9 +1624,13 @@ static void kvmgt_page_track_write(struct kvm_vcpu *vcpu, gpa_t gpa, struct intel_vgpu *info = container_of(node, struct intel_vgpu, track_node); + mutex_lock(&info->vgpu_lock); + if (kvmgt_gfn_is_write_protected(info, gpa_to_gfn(gpa))) intel_vgpu_page_track_handler(info, gpa, (void *)val, len); + + mutex_unlock(&info->vgpu_lock); } static void kvmgt_page_track_flush_slot(struct kvm *kvm, @@ -1639,16 +1642,20 @@ static void kvmgt_page_track_flush_slot(struct kvm *kvm, struct intel_vgpu *info = container_of(node, struct intel_vgpu, track_node); - write_lock(&kvm->mmu_lock); + mutex_lock(&info->vgpu_lock); + for (i = 0; i < slot->npages; i++) { gfn = slot->base_gfn + i; if (kvmgt_gfn_is_write_protected(info, gfn)) { + write_lock(&kvm->mmu_lock); kvm_slot_page_track_remove_page(kvm, slot, gfn, KVM_PAGE_TRACK_WRITE); + write_unlock(&kvm->mmu_lock); + kvmgt_protect_table_del(info, gfn); } } - write_unlock(&kvm->mmu_lock); + mutex_unlock(&info->vgpu_lock); } void intel_vgpu_detach_regions(struct intel_vgpu *vgpu) diff --git a/drivers/gpu/drm/i915/gvt/page_track.c b/drivers/gpu/drm/i915/gvt/page_track.c index df34e73cba41..60a65435556d 100644 --- a/drivers/gpu/drm/i915/gvt/page_track.c +++ b/drivers/gpu/drm/i915/gvt/page_track.c @@ -162,13 +162,9 @@ int intel_vgpu_page_track_handler(struct intel_vgpu *vgpu, u64 gpa, struct intel_vgpu_page_track *page_track; int ret = 0; - mutex_lock(&vgpu->vgpu_lock); - page_track = intel_vgpu_find_page_track(vgpu, gpa >> PAGE_SHIFT); - if (!page_track) { - ret = -ENXIO; - goto out; - } + if (!page_track) + return -ENXIO; if (unlikely(vgpu->failsafe)) { /* Remove write protection to prevent furture traps. */ @@ -179,7 +175,5 @@ int intel_vgpu_page_track_handler(struct intel_vgpu *vgpu, u64 gpa, gvt_err("guest page write error, gpa %llx\n", gpa); } -out: - mutex_unlock(&vgpu->vgpu_lock); return ret; }
Use vgpu_lock instead of KVM's mmu_lock to protect accesses to the hash table used to track which gfns are write-protected when shadowing the guest's GTT, and hoist the acquisition of vgpu_lock from intel_vgpu_page_track_handler() out to its sole caller, kvmgt_page_track_write(). This fixes a bug where kvmgt_page_track_write(), which doesn't hold kvm->mmu_lock, could race with intel_gvt_page_track_remove() and trigger a use-after-free. Fixing kvmgt_page_track_write() by taking kvm->mmu_lock is not an option as mmu_lock is a r/w spinlock, and intel_vgpu_page_track_handler() might sleep when acquiring vgpu->cache_lock deep down the callstack: intel_vgpu_page_track_handler() | |-> page_track->handler / ppgtt_write_protection_handler() | |-> ppgtt_handle_guest_write_page_table_bytes() | |-> ppgtt_handle_guest_write_page_table() | |-> ppgtt_handle_guest_entry_removal() | |-> ppgtt_invalidate_pte() | |-> intel_gvt_dma_unmap_guest_page() | |-> mutex_lock(&vgpu->cache_lock); Signed-off-by: Sean Christopherson <seanjc@google.com> --- drivers/gpu/drm/i915/gvt/kvmgt.c | 55 +++++++++++++++------------ drivers/gpu/drm/i915/gvt/page_track.c | 10 +---- 2 files changed, 33 insertions(+), 32 deletions(-)