diff mbox

[1/1] KVM: Fix potentially recursively get kvm lock

Message ID 1242120729-2280-1-git-send-email-sheng@linux.intel.com (mailing list archive)
State New, archived
Headers show

Commit Message

Sheng Yang May 12, 2009, 9:32 a.m. UTC
kvm_vm_ioctl_deassign_dev_irq() would potentially recursively get kvm->lock,
because it called kvm_deassigned_irq() which implicit hold kvm->lock by calling
deassign_host_irq().

Fix it by move kvm_deassign_irq() out of critial region. And add the missing
lock for deassign_guest_irq().

Reported-by: Alex Williamson <alex.williamson@hp.com>
Signed-off-by: Sheng Yang <sheng@linux.intel.com>
---
 virt/kvm/kvm_main.c |   14 +++++++-------
 1 files changed, 7 insertions(+), 7 deletions(-)

Comments

Marcelo Tosatti May 12, 2009, 11:55 a.m. UTC | #1
On Tue, May 12, 2009 at 05:32:09PM +0800, Sheng Yang wrote:
> kvm_vm_ioctl_deassign_dev_irq() would potentially recursively get kvm->lock,
> because it called kvm_deassigned_irq() which implicit hold kvm->lock by calling
> deassign_host_irq().
> 
> Fix it by move kvm_deassign_irq() out of critial region. And add the missing
> lock for deassign_guest_irq().
> 
> Reported-by: Alex Williamson <alex.williamson@hp.com>
> Signed-off-by: Sheng Yang <sheng@linux.intel.com>
> ---
>  virt/kvm/kvm_main.c |   14 +++++++-------
>  1 files changed, 7 insertions(+), 7 deletions(-)
> 
> diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
> index 4d00942..3c69655 100644
> --- a/virt/kvm/kvm_main.c
> +++ b/virt/kvm/kvm_main.c
> @@ -215,6 +215,8 @@ static void kvm_assigned_dev_ack_irq(struct kvm_irq_ack_notifier *kian)
>  static void deassign_guest_irq(struct kvm *kvm,
>  			       struct kvm_assigned_dev_kernel *assigned_dev)
>  {
> +	mutex_lock(&kvm->lock);
> +
>  	kvm_unregister_irq_ack_notifier(&assigned_dev->ack_notifier);
>  	assigned_dev->ack_notifier.gsi = -1;
>  
> @@ -222,6 +224,8 @@ static void deassign_guest_irq(struct kvm *kvm,
>  		kvm_free_irq_source_id(kvm, assigned_dev->irq_source_id);
>  	assigned_dev->irq_source_id = -1;
>  	assigned_dev->irq_requested_type &= ~(KVM_DEV_IRQ_GUEST_MASK);
> +
> +	mutex_unlock(&kvm->lock);
>  }
>  
>  /* The function implicit hold kvm->lock mutex due to cancel_work_sync() */
> @@ -558,20 +562,16 @@ static int kvm_vm_ioctl_deassign_dev_irq(struct kvm *kvm,
>  					 struct kvm_assigned_irq
>  					 *assigned_irq)
>  {
> -	int r = -ENODEV;
>  	struct kvm_assigned_dev_kernel *match;
>  
>  	mutex_lock(&kvm->lock);
> -
>  	match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head,
>  				      assigned_irq->assigned_dev_id);
> +	mutex_unlock(&kvm->lock);

assigned_dev list is protected by kvm->lock. So you could have another
ioctl adding to it at the same time you're searching.

Could either have a separate kvm->assigned_devs_lock, to protect
kvm->arch.assigned_dev_head (users are ioctls that manipulate it), or
change the IRQ injection to use a separate spinlock, kill the workqueue
and call kvm_set_irq from the assigned device interrupt handler.

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Yang, Sheng May 12, 2009, 2:13 p.m. UTC | #2
On Tuesday 12 May 2009 19:55:24 Marcelo Tosatti wrote:
> On Tue, May 12, 2009 at 05:32:09PM +0800, Sheng Yang wrote:
> > kvm_vm_ioctl_deassign_dev_irq() would potentially recursively get
> > kvm->lock, because it called kvm_deassigned_irq() which implicit hold
> > kvm->lock by calling deassign_host_irq().
> >
> > Fix it by move kvm_deassign_irq() out of critial region. And add the
> > missing lock for deassign_guest_irq().
> >
> > Reported-by: Alex Williamson <alex.williamson@hp.com>
> > Signed-off-by: Sheng Yang <sheng@linux.intel.com>
> > ---
> >  virt/kvm/kvm_main.c |   14 +++++++-------
> >  1 files changed, 7 insertions(+), 7 deletions(-)
> >
> > diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
> > index 4d00942..3c69655 100644
> > --- a/virt/kvm/kvm_main.c
> > +++ b/virt/kvm/kvm_main.c
> > @@ -215,6 +215,8 @@ static void kvm_assigned_dev_ack_irq(struct
> > kvm_irq_ack_notifier *kian) static void deassign_guest_irq(struct kvm
> > *kvm,
> >  			       struct kvm_assigned_dev_kernel *assigned_dev)
> >  {
> > +	mutex_lock(&kvm->lock);
> > +
> >  	kvm_unregister_irq_ack_notifier(&assigned_dev->ack_notifier);
> >  	assigned_dev->ack_notifier.gsi = -1;
> >
> > @@ -222,6 +224,8 @@ static void deassign_guest_irq(struct kvm *kvm,
> >  		kvm_free_irq_source_id(kvm, assigned_dev->irq_source_id);
> >  	assigned_dev->irq_source_id = -1;
> >  	assigned_dev->irq_requested_type &= ~(KVM_DEV_IRQ_GUEST_MASK);
> > +
> > +	mutex_unlock(&kvm->lock);
> >  }
> >
> >  /* The function implicit hold kvm->lock mutex due to cancel_work_sync()
> > */ @@ -558,20 +562,16 @@ static int kvm_vm_ioctl_deassign_dev_irq(struct
> > kvm *kvm, struct kvm_assigned_irq
> >  					 *assigned_irq)
> >  {
> > -	int r = -ENODEV;
> >  	struct kvm_assigned_dev_kernel *match;
> >
> >  	mutex_lock(&kvm->lock);
> > -
> >  	match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head,
> >  				      assigned_irq->assigned_dev_id);
> > +	mutex_unlock(&kvm->lock);
>
> assigned_dev list is protected by kvm->lock. So you could have another
> ioctl adding to it at the same time you're searching.

Oh, yes... My fault... 

> Could either have a separate kvm->assigned_devs_lock, to protect
> kvm->arch.assigned_dev_head (users are ioctls that manipulate it), or
> change the IRQ injection to use a separate spinlock, kill the workqueue
> and call kvm_set_irq from the assigned device interrupt handler.

Peferred the latter, though needs more work. But the only reason for put a 
workqueue here is because kvm->lock is a mutex? I can't believe... If so, I 
think we had made a big mistake - we have to fix all kinds of racy problem 
caused by this, but finally find it's unnecessary... 

Maybe another reason is kvm_kick_vcpu(), but have already fixed by you.

Continue to check the code...
Marcelo Tosatti May 12, 2009, 2:30 p.m. UTC | #3
On Tue, May 12, 2009 at 10:13:36PM +0800, Yang, Sheng wrote:
> > > +	mutex_unlock(&kvm->lock);
> >
> > assigned_dev list is protected by kvm->lock. So you could have another
> > ioctl adding to it at the same time you're searching.
> 
> Oh, yes... My fault... 
> 
> > Could either have a separate kvm->assigned_devs_lock, to protect
> > kvm->arch.assigned_dev_head (users are ioctls that manipulate it), or
> > change the IRQ injection to use a separate spinlock, kill the workqueue
> > and call kvm_set_irq from the assigned device interrupt handler.
> 
> Peferred the latter, though needs more work. But the only reason for put a 
> workqueue here is because kvm->lock is a mutex? I can't believe... If so, I 
> think we had made a big mistake - we have to fix all kinds of racy problem 
> caused by this, but finally find it's unnecessary... 

One issue is that kvm_set_irq can take too long while interrupts are
blocked, and you'd have to disable interrupts in other contexes that
inject interrupts (say qemu->ioctl(SET_INTERRUPT)->...->), so all i can
see is a tradeoff.

<guess mode on>

But the interrupt injection path seems to be pretty short and efficient
to happen in host interrupt context.

<guess mode off>

Avi, Gleb?

> Maybe another reason is kvm_kick_vcpu(), but have already fixed by you.

Note you tested the spinlock_irq patch with GigE and there was no
significant performance regression right?

> 
> Continue to check the code...
> 
> -- 
> regards
> Yang, Sheng
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Marcelo Tosatti May 12, 2009, 3:59 p.m. UTC | #4
On Tue, May 12, 2009 at 11:30:21AM -0300, Marcelo Tosatti wrote:
> On Tue, May 12, 2009 at 10:13:36PM +0800, Yang, Sheng wrote:
> > > > +	mutex_unlock(&kvm->lock);
> > >
> > > assigned_dev list is protected by kvm->lock. So you could have another
> > > ioctl adding to it at the same time you're searching.
> > 
> > Oh, yes... My fault... 
> > 
> > > Could either have a separate kvm->assigned_devs_lock, to protect
> > > kvm->arch.assigned_dev_head (users are ioctls that manipulate it), or
> > > change the IRQ injection to use a separate spinlock, kill the workqueue
> > > and call kvm_set_irq from the assigned device interrupt handler.
> > 
> > Peferred the latter, though needs more work. But the only reason for put a 
> > workqueue here is because kvm->lock is a mutex? I can't believe... If so, I 
> > think we had made a big mistake - we have to fix all kinds of racy problem 
> > caused by this, but finally find it's unnecessary... 
> 
> One issue is that kvm_set_irq can take too long while interrupts are
> blocked, and you'd have to disable interrupts in other contexes that
> inject interrupts (say qemu->ioctl(SET_INTERRUPT)->...->), so all i can
> see is a tradeoff.

Or multiple kvm_set_irq calls for MSI.

> 
> <guess mode on>
> 
> But the interrupt injection path seems to be pretty short and efficient
> to happen in host interrupt context.
> 
> <guess mode off>
> 
> Avi, Gleb?
> 
> > Maybe another reason is kvm_kick_vcpu(), but have already fixed by you.
> 
> Note you tested the spinlock_irq patch with GigE and there was no
> significant performance regression right?
> 
> > 
> > Continue to check the code...
> > 
> > -- 
> > regards
> > Yang, Sheng
> --
> To unsubscribe from this list: send the line "unsubscribe kvm" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Gleb Natapov May 13, 2009, 11:43 a.m. UTC | #5
On Tue, May 12, 2009 at 11:30:21AM -0300, Marcelo Tosatti wrote:
> On Tue, May 12, 2009 at 10:13:36PM +0800, Yang, Sheng wrote:
> > > > +	mutex_unlock(&kvm->lock);
> > >
> > > assigned_dev list is protected by kvm->lock. So you could have another
> > > ioctl adding to it at the same time you're searching.
> > 
> > Oh, yes... My fault... 
> > 
> > > Could either have a separate kvm->assigned_devs_lock, to protect
> > > kvm->arch.assigned_dev_head (users are ioctls that manipulate it), or
> > > change the IRQ injection to use a separate spinlock, kill the workqueue
> > > and call kvm_set_irq from the assigned device interrupt handler.
> > 
> > Peferred the latter, though needs more work. But the only reason for put a 
> > workqueue here is because kvm->lock is a mutex? I can't believe... If so, I 
> > think we had made a big mistake - we have to fix all kinds of racy problem 
> > caused by this, but finally find it's unnecessary... 
> 
> One issue is that kvm_set_irq can take too long while interrupts are
> blocked, and you'd have to disable interrupts in other contexes that
> inject interrupts (say qemu->ioctl(SET_INTERRUPT)->...->), so all i can
> see is a tradeoff.
> 
> <guess mode on>
> 
> But the interrupt injection path seems to be pretty short and efficient
> to happen in host interrupt context.
> 
> <guess mode off>
> 
> Avi, Gleb?
> 
Interrupt injection path also use IRQ routing data structures so access
to them should be protected by the same lock. And of cause in kernel
device (apic/ioapic/pic) mmio is done holding this lock so interrupt
injection cannot happen in parallel with device reconfiguration. May
be we want more parallelism here.

> > Maybe another reason is kvm_kick_vcpu(), but have already fixed by you.
> 
> Note you tested the spinlock_irq patch with GigE and there was no
> significant performance regression right?
> 
> > 
> > Continue to check the code...
> > 
> > -- 
> > regards
> > Yang, Sheng
> --
> To unsubscribe from this list: send the line "unsubscribe kvm" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

--
			Gleb.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 4d00942..3c69655 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -215,6 +215,8 @@  static void kvm_assigned_dev_ack_irq(struct kvm_irq_ack_notifier *kian)
 static void deassign_guest_irq(struct kvm *kvm,
 			       struct kvm_assigned_dev_kernel *assigned_dev)
 {
+	mutex_lock(&kvm->lock);
+
 	kvm_unregister_irq_ack_notifier(&assigned_dev->ack_notifier);
 	assigned_dev->ack_notifier.gsi = -1;
 
@@ -222,6 +224,8 @@  static void deassign_guest_irq(struct kvm *kvm,
 		kvm_free_irq_source_id(kvm, assigned_dev->irq_source_id);
 	assigned_dev->irq_source_id = -1;
 	assigned_dev->irq_requested_type &= ~(KVM_DEV_IRQ_GUEST_MASK);
+
+	mutex_unlock(&kvm->lock);
 }
 
 /* The function implicit hold kvm->lock mutex due to cancel_work_sync() */
@@ -558,20 +562,16 @@  static int kvm_vm_ioctl_deassign_dev_irq(struct kvm *kvm,
 					 struct kvm_assigned_irq
 					 *assigned_irq)
 {
-	int r = -ENODEV;
 	struct kvm_assigned_dev_kernel *match;
 
 	mutex_lock(&kvm->lock);
-
 	match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head,
 				      assigned_irq->assigned_dev_id);
+	mutex_unlock(&kvm->lock);
 	if (!match)
-		goto out;
+		return -ENODEV;
 
-	r = kvm_deassign_irq(kvm, match, assigned_irq->flags);
-out:
-	mutex_unlock(&kvm->lock);
-	return r;
+	return kvm_deassign_irq(kvm, match, assigned_irq->flags);
 }
 
 static int kvm_vm_ioctl_assign_device(struct kvm *kvm,