diff mbox

[4/4] Convert irq notifiers lists to RCU locking.

Message ID 1247400233-24243-5-git-send-email-gleb@redhat.com (mailing list archive)
State New, archived
Headers show

Commit Message

Gleb Natapov July 12, 2009, 12:03 p.m. UTC
Use RCU locking for mask/ack notifiers lists.

Signed-off-by: Gleb Natapov <gleb@redhat.com>
---
 virt/kvm/irq_comm.c |   20 +++++++++++---------
 1 files changed, 11 insertions(+), 9 deletions(-)

Comments

Michael S. Tsirkin July 13, 2009, 12:56 p.m. UTC | #1
On Sun, Jul 12, 2009 at 03:03:53PM +0300, Gleb Natapov wrote:
> Use RCU locking for mask/ack notifiers lists.
> 
> Signed-off-by: Gleb Natapov <gleb@redhat.com>
> ---
>  virt/kvm/irq_comm.c |   20 +++++++++++---------
>  1 files changed, 11 insertions(+), 9 deletions(-)
> 
> diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c
> index 5dde1ef..ba3a115 100644
> --- a/virt/kvm/irq_comm.c
> +++ b/virt/kvm/irq_comm.c
> @@ -179,18 +179,18 @@ void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin)
>  			break;
>  		}
>  	}
> -	rcu_read_unlock();
>  
> -	hlist_for_each_entry(kian, n, &kvm->irq_ack_notifier_list, link)
> +	hlist_for_each_entry_rcu(kian, n, &kvm->irq_ack_notifier_list, link)
>  		if (kian->gsi == gsi)
>  			kian->irq_acked(kian);
> +	rcu_read_unlock();
>  }
>  
>  void kvm_register_irq_ack_notifier(struct kvm *kvm,
>  				   struct kvm_irq_ack_notifier *kian)
>  {
>  	mutex_lock(&kvm->irq_lock);
> -	hlist_add_head(&kian->link, &kvm->irq_ack_notifier_list);
> +	hlist_add_head_rcu(&kian->link, &kvm->irq_ack_notifier_list);
>  	mutex_unlock(&kvm->irq_lock);

I think we need synchronize_rcu here as well. If the user adds a
notifier, he expects to get notified of irqs immediately
after the function returns, not after the next rcu grace period.

>  }
>  
> @@ -198,8 +198,9 @@ void kvm_unregister_irq_ack_notifier(struct kvm *kvm,
>  				    struct kvm_irq_ack_notifier *kian)
>  {
>  	mutex_lock(&kvm->irq_lock);
> -	hlist_del_init(&kian->link);
> +	hlist_del_init_rcu(&kian->link);
>  	mutex_unlock(&kvm->irq_lock);
> +	synchronize_rcu();
>  }
>  
>  int kvm_request_irq_source_id(struct kvm *kvm)
> @@ -246,7 +247,7 @@ void kvm_register_irq_mask_notifier(struct kvm *kvm, int irq,
>  {
>  	mutex_lock(&kvm->irq_lock);
>  	kimn->irq = irq;
> -	hlist_add_head(&kimn->link, &kvm->mask_notifier_list);
> +	hlist_add_head_rcu(&kimn->link, &kvm->mask_notifier_list);
>  	mutex_unlock(&kvm->irq_lock);
>  }
>  
> @@ -254,8 +255,9 @@ void kvm_unregister_irq_mask_notifier(struct kvm *kvm, int irq,
>  				      struct kvm_irq_mask_notifier *kimn)
>  {
>  	mutex_lock(&kvm->irq_lock);
> -	hlist_del(&kimn->link);
> +	hlist_del_rcu(&kimn->link);
>  	mutex_unlock(&kvm->irq_lock);
> +	synchronize_rcu();
>  }
>  
>  void kvm_fire_mask_notifiers(struct kvm *kvm, int irq, bool mask)
> @@ -263,11 +265,11 @@ void kvm_fire_mask_notifiers(struct kvm *kvm, int irq, bool mask)
>  	struct kvm_irq_mask_notifier *kimn;
>  	struct hlist_node *n;
>  
> -	WARN_ON(!mutex_is_locked(&kvm->irq_lock));
> -
> -	hlist_for_each_entry(kimn, n, &kvm->mask_notifier_list, link)
> +	rcu_read_lock();
> +	hlist_for_each_entry_rcu(kimn, n, &kvm->mask_notifier_list, link)
>  		if (kimn->irq == irq)
>  			kimn->func(kimn, mask);
> +	rcu_read_unlock();
>  }
>  
>  void kvm_free_irq_routing(struct kvm *kvm)
> -- 
> 1.6.2.1
> 
> --
> To unsubscribe from this list: send the line "unsubscribe kvm" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Michael S. Tsirkin July 13, 2009, 1:02 p.m. UTC | #2
On Sun, Jul 12, 2009 at 03:03:53PM +0300, Gleb Natapov wrote:
> Use RCU locking for mask/ack notifiers lists.
> 
> Signed-off-by: Gleb Natapov <gleb@redhat.com>
> ---
>  virt/kvm/irq_comm.c |   20 +++++++++++---------
>  1 files changed, 11 insertions(+), 9 deletions(-)
> 
> diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c
> index 5dde1ef..ba3a115 100644
> --- a/virt/kvm/irq_comm.c
> +++ b/virt/kvm/irq_comm.c
> @@ -179,18 +179,18 @@ void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin)
>  			break;
>  		}
>  	}
> -	rcu_read_unlock();
>  
> -	hlist_for_each_entry(kian, n, &kvm->irq_ack_notifier_list, link)
> +	hlist_for_each_entry_rcu(kian, n, &kvm->irq_ack_notifier_list, link)
>  		if (kian->gsi == gsi)
>  			kian->irq_acked(kian);
> +	rcu_read_unlock();
>  }
>  
>  void kvm_register_irq_ack_notifier(struct kvm *kvm,
>  				   struct kvm_irq_ack_notifier *kian)
>  {
>  	mutex_lock(&kvm->irq_lock);
> -	hlist_add_head(&kian->link, &kvm->irq_ack_notifier_list);
> +	hlist_add_head_rcu(&kian->link, &kvm->irq_ack_notifier_list);
>  	mutex_unlock(&kvm->irq_lock);
>  }
>  
> @@ -198,8 +198,9 @@ void kvm_unregister_irq_ack_notifier(struct kvm *kvm,
>  				    struct kvm_irq_ack_notifier *kian)
>  {
>  	mutex_lock(&kvm->irq_lock);
> -	hlist_del_init(&kian->link);
> +	hlist_del_init_rcu(&kian->link);
>  	mutex_unlock(&kvm->irq_lock);
> +	synchronize_rcu();
>  }

This is done under kvm->lock still, which means the lock might be held
potentially for a very long time. Can synchronize_rcu be moved out of
this lock?

>  int kvm_request_irq_source_id(struct kvm *kvm)
> @@ -246,7 +247,7 @@ void kvm_register_irq_mask_notifier(struct kvm *kvm, int irq,
>  {
>  	mutex_lock(&kvm->irq_lock);
>  	kimn->irq = irq;
> -	hlist_add_head(&kimn->link, &kvm->mask_notifier_list);
> +	hlist_add_head_rcu(&kimn->link, &kvm->mask_notifier_list);
>  	mutex_unlock(&kvm->irq_lock);
>  }
>  
> @@ -254,8 +255,9 @@ void kvm_unregister_irq_mask_notifier(struct kvm *kvm, int irq,
>  				      struct kvm_irq_mask_notifier *kimn)
>  {
>  	mutex_lock(&kvm->irq_lock);
> -	hlist_del(&kimn->link);
> +	hlist_del_rcu(&kimn->link);
>  	mutex_unlock(&kvm->irq_lock);
> +	synchronize_rcu();
>  }
>  
>  void kvm_fire_mask_notifiers(struct kvm *kvm, int irq, bool mask)
> @@ -263,11 +265,11 @@ void kvm_fire_mask_notifiers(struct kvm *kvm, int irq, bool mask)
>  	struct kvm_irq_mask_notifier *kimn;
>  	struct hlist_node *n;
>  
> -	WARN_ON(!mutex_is_locked(&kvm->irq_lock));
> -
> -	hlist_for_each_entry(kimn, n, &kvm->mask_notifier_list, link)
> +	rcu_read_lock();
> +	hlist_for_each_entry_rcu(kimn, n, &kvm->mask_notifier_list, link)
>  		if (kimn->irq == irq)
>  			kimn->func(kimn, mask);
> +	rcu_read_unlock();
>  }
>  
>  void kvm_free_irq_routing(struct kvm *kvm)
> -- 
> 1.6.2.1
> 
> --
> To unsubscribe from this list: send the line "unsubscribe kvm" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Gleb Natapov July 13, 2009, 1:05 p.m. UTC | #3
On Mon, Jul 13, 2009 at 03:56:40PM +0300, Michael S. Tsirkin wrote:
> On Sun, Jul 12, 2009 at 03:03:53PM +0300, Gleb Natapov wrote:
> > Use RCU locking for mask/ack notifiers lists.
> > 
> > Signed-off-by: Gleb Natapov <gleb@redhat.com>
> > ---
> >  virt/kvm/irq_comm.c |   20 +++++++++++---------
> >  1 files changed, 11 insertions(+), 9 deletions(-)
> > 
> > diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c
> > index 5dde1ef..ba3a115 100644
> > --- a/virt/kvm/irq_comm.c
> > +++ b/virt/kvm/irq_comm.c
> > @@ -179,18 +179,18 @@ void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin)
> >  			break;
> >  		}
> >  	}
> > -	rcu_read_unlock();
> >  
> > -	hlist_for_each_entry(kian, n, &kvm->irq_ack_notifier_list, link)
> > +	hlist_for_each_entry_rcu(kian, n, &kvm->irq_ack_notifier_list, link)
> >  		if (kian->gsi == gsi)
> >  			kian->irq_acked(kian);
> > +	rcu_read_unlock();
> >  }
> >  
> >  void kvm_register_irq_ack_notifier(struct kvm *kvm,
> >  				   struct kvm_irq_ack_notifier *kian)
> >  {
> >  	mutex_lock(&kvm->irq_lock);
> > -	hlist_add_head(&kian->link, &kvm->irq_ack_notifier_list);
> > +	hlist_add_head_rcu(&kian->link, &kvm->irq_ack_notifier_list);
> >  	mutex_unlock(&kvm->irq_lock);
> 
> I think we need synchronize_rcu here as well. If the user adds a
> notifier, he expects to get notified of irqs immediately
> after the function returns, not after the next rcu grace period.
> 
If ack notifier registration races with acknowledgment of the interrupt
it adds notifier for a user already does something terribly wrong.

> >  }
> >  
> > @@ -198,8 +198,9 @@ void kvm_unregister_irq_ack_notifier(struct kvm *kvm,
> >  				    struct kvm_irq_ack_notifier *kian)
> >  {
> >  	mutex_lock(&kvm->irq_lock);
> > -	hlist_del_init(&kian->link);
> > +	hlist_del_init_rcu(&kian->link);
> >  	mutex_unlock(&kvm->irq_lock);
> > +	synchronize_rcu();
> >  }
> >  
> >  int kvm_request_irq_source_id(struct kvm *kvm)
> > @@ -246,7 +247,7 @@ void kvm_register_irq_mask_notifier(struct kvm *kvm, int irq,
> >  {
> >  	mutex_lock(&kvm->irq_lock);
> >  	kimn->irq = irq;
> > -	hlist_add_head(&kimn->link, &kvm->mask_notifier_list);
> > +	hlist_add_head_rcu(&kimn->link, &kvm->mask_notifier_list);
> >  	mutex_unlock(&kvm->irq_lock);
> >  }
> >  
> > @@ -254,8 +255,9 @@ void kvm_unregister_irq_mask_notifier(struct kvm *kvm, int irq,
> >  				      struct kvm_irq_mask_notifier *kimn)
> >  {
> >  	mutex_lock(&kvm->irq_lock);
> > -	hlist_del(&kimn->link);
> > +	hlist_del_rcu(&kimn->link);
> >  	mutex_unlock(&kvm->irq_lock);
> > +	synchronize_rcu();
> >  }
> >  
> >  void kvm_fire_mask_notifiers(struct kvm *kvm, int irq, bool mask)
> > @@ -263,11 +265,11 @@ void kvm_fire_mask_notifiers(struct kvm *kvm, int irq, bool mask)
> >  	struct kvm_irq_mask_notifier *kimn;
> >  	struct hlist_node *n;
> >  
> > -	WARN_ON(!mutex_is_locked(&kvm->irq_lock));
> > -
> > -	hlist_for_each_entry(kimn, n, &kvm->mask_notifier_list, link)
> > +	rcu_read_lock();
> > +	hlist_for_each_entry_rcu(kimn, n, &kvm->mask_notifier_list, link)
> >  		if (kimn->irq == irq)
> >  			kimn->func(kimn, mask);
> > +	rcu_read_unlock();
> >  }
> >  
> >  void kvm_free_irq_routing(struct kvm *kvm)
> > -- 
> > 1.6.2.1
> > 
> > --
> > To unsubscribe from this list: send the line "unsubscribe kvm" in
> > the body of a message to majordomo@vger.kernel.org
> > More majordomo info at  http://vger.kernel.org/majordomo-info.html

--
			Gleb.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Gleb Natapov July 13, 2009, 1:11 p.m. UTC | #4
On Mon, Jul 13, 2009 at 04:02:56PM +0300, Michael S. Tsirkin wrote:
> On Sun, Jul 12, 2009 at 03:03:53PM +0300, Gleb Natapov wrote:
> > Use RCU locking for mask/ack notifiers lists.
> > 
> > Signed-off-by: Gleb Natapov <gleb@redhat.com>
> > ---
> >  virt/kvm/irq_comm.c |   20 +++++++++++---------
> >  1 files changed, 11 insertions(+), 9 deletions(-)
> > 
> > diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c
> > index 5dde1ef..ba3a115 100644
> > --- a/virt/kvm/irq_comm.c
> > +++ b/virt/kvm/irq_comm.c
> > @@ -179,18 +179,18 @@ void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin)
> >  			break;
> >  		}
> >  	}
> > -	rcu_read_unlock();
> >  
> > -	hlist_for_each_entry(kian, n, &kvm->irq_ack_notifier_list, link)
> > +	hlist_for_each_entry_rcu(kian, n, &kvm->irq_ack_notifier_list, link)
> >  		if (kian->gsi == gsi)
> >  			kian->irq_acked(kian);
> > +	rcu_read_unlock();
> >  }
> >  
> >  void kvm_register_irq_ack_notifier(struct kvm *kvm,
> >  				   struct kvm_irq_ack_notifier *kian)
> >  {
> >  	mutex_lock(&kvm->irq_lock);
> > -	hlist_add_head(&kian->link, &kvm->irq_ack_notifier_list);
> > +	hlist_add_head_rcu(&kian->link, &kvm->irq_ack_notifier_list);
> >  	mutex_unlock(&kvm->irq_lock);
> >  }
> >  
> > @@ -198,8 +198,9 @@ void kvm_unregister_irq_ack_notifier(struct kvm *kvm,
> >  				    struct kvm_irq_ack_notifier *kian)
> >  {
> >  	mutex_lock(&kvm->irq_lock);
> > -	hlist_del_init(&kian->link);
> > +	hlist_del_init_rcu(&kian->link);
> >  	mutex_unlock(&kvm->irq_lock);
> > +	synchronize_rcu();
> >  }
> 
> This is done under kvm->lock still, which means the lock might be held
> potentially for a very long time. Can synchronize_rcu be moved out of
> this lock?
> 
Only if kvm_free_assigned_device() will be moved out of this lock.
Device de-assignment is not very frequent event though. How long do you
think it may be held? KVM RCU read sections are very brief.

> >  int kvm_request_irq_source_id(struct kvm *kvm)
> > @@ -246,7 +247,7 @@ void kvm_register_irq_mask_notifier(struct kvm *kvm, int irq,
> >  {
> >  	mutex_lock(&kvm->irq_lock);
> >  	kimn->irq = irq;
> > -	hlist_add_head(&kimn->link, &kvm->mask_notifier_list);
> > +	hlist_add_head_rcu(&kimn->link, &kvm->mask_notifier_list);
> >  	mutex_unlock(&kvm->irq_lock);
> >  }
> >  
> > @@ -254,8 +255,9 @@ void kvm_unregister_irq_mask_notifier(struct kvm *kvm, int irq,
> >  				      struct kvm_irq_mask_notifier *kimn)
> >  {
> >  	mutex_lock(&kvm->irq_lock);
> > -	hlist_del(&kimn->link);
> > +	hlist_del_rcu(&kimn->link);
> >  	mutex_unlock(&kvm->irq_lock);
> > +	synchronize_rcu();
> >  }
> >  
> >  void kvm_fire_mask_notifiers(struct kvm *kvm, int irq, bool mask)
> > @@ -263,11 +265,11 @@ void kvm_fire_mask_notifiers(struct kvm *kvm, int irq, bool mask)
> >  	struct kvm_irq_mask_notifier *kimn;
> >  	struct hlist_node *n;
> >  
> > -	WARN_ON(!mutex_is_locked(&kvm->irq_lock));
> > -
> > -	hlist_for_each_entry(kimn, n, &kvm->mask_notifier_list, link)
> > +	rcu_read_lock();
> > +	hlist_for_each_entry_rcu(kimn, n, &kvm->mask_notifier_list, link)
> >  		if (kimn->irq == irq)
> >  			kimn->func(kimn, mask);
> > +	rcu_read_unlock();
> >  }
> >  
> >  void kvm_free_irq_routing(struct kvm *kvm)
> > -- 
> > 1.6.2.1
> > 
> > --
> > To unsubscribe from this list: send the line "unsubscribe kvm" in
> > the body of a message to majordomo@vger.kernel.org
> > More majordomo info at  http://vger.kernel.org/majordomo-info.html

--
			Gleb.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Gregory Haskins July 13, 2009, 1:26 p.m. UTC | #5
Gleb Natapov wrote:
> On Mon, Jul 13, 2009 at 04:02:56PM +0300, Michael S. Tsirkin wrote:
>   
>> On Sun, Jul 12, 2009 at 03:03:53PM +0300, Gleb Natapov wrote:
>>     
>>> Use RCU locking for mask/ack notifiers lists.
>>>
>>> Signed-off-by: Gleb Natapov <gleb@redhat.com>
>>> ---
>>>  virt/kvm/irq_comm.c |   20 +++++++++++---------
>>>  1 files changed, 11 insertions(+), 9 deletions(-)
>>>
>>> diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c
>>> index 5dde1ef..ba3a115 100644
>>> --- a/virt/kvm/irq_comm.c
>>> +++ b/virt/kvm/irq_comm.c
>>> @@ -179,18 +179,18 @@ void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin)
>>>  			break;
>>>  		}
>>>  	}
>>> -	rcu_read_unlock();
>>>  
>>> -	hlist_for_each_entry(kian, n, &kvm->irq_ack_notifier_list, link)
>>> +	hlist_for_each_entry_rcu(kian, n, &kvm->irq_ack_notifier_list, link)
>>>  		if (kian->gsi == gsi)
>>>  			kian->irq_acked(kian);
>>> +	rcu_read_unlock();
>>>  }
>>>  
>>>  void kvm_register_irq_ack_notifier(struct kvm *kvm,
>>>  				   struct kvm_irq_ack_notifier *kian)
>>>  {
>>>  	mutex_lock(&kvm->irq_lock);
>>> -	hlist_add_head(&kian->link, &kvm->irq_ack_notifier_list);
>>> +	hlist_add_head_rcu(&kian->link, &kvm->irq_ack_notifier_list);
>>>  	mutex_unlock(&kvm->irq_lock);
>>>  }
>>>  
>>> @@ -198,8 +198,9 @@ void kvm_unregister_irq_ack_notifier(struct kvm *kvm,
>>>  				    struct kvm_irq_ack_notifier *kian)
>>>  {
>>>  	mutex_lock(&kvm->irq_lock);
>>> -	hlist_del_init(&kian->link);
>>> +	hlist_del_init_rcu(&kian->link);
>>>  	mutex_unlock(&kvm->irq_lock);
>>> +	synchronize_rcu();
>>>  }
>>>       
>> This is done under kvm->lock still, which means the lock might be held
>> potentially for a very long time. Can synchronize_rcu be moved out of
>> this lock?
>>
>>     
> Only if kvm_free_assigned_device() will be moved out of this lock.
> Device de-assignment is not very frequent event though. How long do you
> think it may be held? KVM RCU read sections are very brief.
>   

Note that the delay imposed by the barrier is not only related to the
length of the critical section.  The barrier blocks until the next grace
period, and depending on the type of RCU you are using and your config
options, this could be multiple milliseconds.

I am not saying that this is definitely a problem for your design.   I
am just pointing out that the length of the KVM-RCU read section is only
one of several factors that influence the ultimate duration of your
kvm->lock CS.  IIUC, in an ideally designed subsystem, the read-side CS
will be dwarfed by the length of the grace, so the grace (and barriers
against it) are really the critical factor of this type of design.

I am CC'ing Paul in case I am saying something dumb/untrue. ;)

Kind Regards,
-Greg

>   
>>>  int kvm_request_irq_source_id(struct kvm *kvm)
>>> @@ -246,7 +247,7 @@ void kvm_register_irq_mask_notifier(struct kvm *kvm, int irq,
>>>  {
>>>  	mutex_lock(&kvm->irq_lock);
>>>  	kimn->irq = irq;
>>> -	hlist_add_head(&kimn->link, &kvm->mask_notifier_list);
>>> +	hlist_add_head_rcu(&kimn->link, &kvm->mask_notifier_list);
>>>  	mutex_unlock(&kvm->irq_lock);
>>>  }
>>>  
>>> @@ -254,8 +255,9 @@ void kvm_unregister_irq_mask_notifier(struct kvm *kvm, int irq,
>>>  				      struct kvm_irq_mask_notifier *kimn)
>>>  {
>>>  	mutex_lock(&kvm->irq_lock);
>>> -	hlist_del(&kimn->link);
>>> +	hlist_del_rcu(&kimn->link);
>>>  	mutex_unlock(&kvm->irq_lock);
>>> +	synchronize_rcu();
>>>  }
>>>  
>>>  void kvm_fire_mask_notifiers(struct kvm *kvm, int irq, bool mask)
>>> @@ -263,11 +265,11 @@ void kvm_fire_mask_notifiers(struct kvm *kvm, int irq, bool mask)
>>>  	struct kvm_irq_mask_notifier *kimn;
>>>  	struct hlist_node *n;
>>>  
>>> -	WARN_ON(!mutex_is_locked(&kvm->irq_lock));
>>> -
>>> -	hlist_for_each_entry(kimn, n, &kvm->mask_notifier_list, link)
>>> +	rcu_read_lock();
>>> +	hlist_for_each_entry_rcu(kimn, n, &kvm->mask_notifier_list, link)
>>>  		if (kimn->irq == irq)
>>>  			kimn->func(kimn, mask);
>>> +	rcu_read_unlock();
>>>  }
>>>  
>>>  void kvm_free_irq_routing(struct kvm *kvm)
>>> -- 
>>> 1.6.2.1
>>>
>>> --
>>> To unsubscribe from this list: send the line "unsubscribe kvm" in
>>> the body of a message to majordomo@vger.kernel.org
>>> More majordomo info at  http://vger.kernel.org/majordomo-info.html
>>>       
>
> --
> 			Gleb.
> --
> To unsubscribe from this list: send the line "unsubscribe kvm" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
>
Michael S. Tsirkin July 13, 2009, 1:29 p.m. UTC | #6
On Mon, Jul 13, 2009 at 04:05:58PM +0300, Gleb Natapov wrote:
> On Mon, Jul 13, 2009 at 03:56:40PM +0300, Michael S. Tsirkin wrote:
> > On Sun, Jul 12, 2009 at 03:03:53PM +0300, Gleb Natapov wrote:
> > > Use RCU locking for mask/ack notifiers lists.
> > > 
> > > Signed-off-by: Gleb Natapov <gleb@redhat.com>
> > > ---
> > >  virt/kvm/irq_comm.c |   20 +++++++++++---------
> > >  1 files changed, 11 insertions(+), 9 deletions(-)
> > > 
> > > diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c
> > > index 5dde1ef..ba3a115 100644
> > > --- a/virt/kvm/irq_comm.c
> > > +++ b/virt/kvm/irq_comm.c
> > > @@ -179,18 +179,18 @@ void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin)
> > >  			break;
> > >  		}
> > >  	}
> > > -	rcu_read_unlock();
> > >  
> > > -	hlist_for_each_entry(kian, n, &kvm->irq_ack_notifier_list, link)
> > > +	hlist_for_each_entry_rcu(kian, n, &kvm->irq_ack_notifier_list, link)
> > >  		if (kian->gsi == gsi)
> > >  			kian->irq_acked(kian);
> > > +	rcu_read_unlock();
> > >  }
> > >  
> > >  void kvm_register_irq_ack_notifier(struct kvm *kvm,
> > >  				   struct kvm_irq_ack_notifier *kian)
> > >  {
> > >  	mutex_lock(&kvm->irq_lock);
> > > -	hlist_add_head(&kian->link, &kvm->irq_ack_notifier_list);
> > > +	hlist_add_head_rcu(&kian->link, &kvm->irq_ack_notifier_list);
> > >  	mutex_unlock(&kvm->irq_lock);
> > 
> > I think we need synchronize_rcu here as well. If the user adds a
> > notifier, he expects to get notified of irqs immediately
> > after the function returns, not after the next rcu grace period.
> > 
> If ack notifier registration races with acknowledgment of the interrupt
> it adds notifier for a user already does something terribly wrong.

Hmm, good point.

> > >  }
> > >  
> > > @@ -198,8 +198,9 @@ void kvm_unregister_irq_ack_notifier(struct kvm *kvm,
> > >  				    struct kvm_irq_ack_notifier *kian)
> > >  {
> > >  	mutex_lock(&kvm->irq_lock);
> > > -	hlist_del_init(&kian->link);
> > > +	hlist_del_init_rcu(&kian->link);
> > >  	mutex_unlock(&kvm->irq_lock);
> > > +	synchronize_rcu();
> > >  }
> > >  
> > >  int kvm_request_irq_source_id(struct kvm *kvm)
> > > @@ -246,7 +247,7 @@ void kvm_register_irq_mask_notifier(struct kvm *kvm, int irq,
> > >  {
> > >  	mutex_lock(&kvm->irq_lock);
> > >  	kimn->irq = irq;
> > > -	hlist_add_head(&kimn->link, &kvm->mask_notifier_list);
> > > +	hlist_add_head_rcu(&kimn->link, &kvm->mask_notifier_list);
> > >  	mutex_unlock(&kvm->irq_lock);
> > >  }
> > >  
> > > @@ -254,8 +255,9 @@ void kvm_unregister_irq_mask_notifier(struct kvm *kvm, int irq,
> > >  				      struct kvm_irq_mask_notifier *kimn)
> > >  {
> > >  	mutex_lock(&kvm->irq_lock);
> > > -	hlist_del(&kimn->link);
> > > +	hlist_del_rcu(&kimn->link);
> > >  	mutex_unlock(&kvm->irq_lock);
> > > +	synchronize_rcu();
> > >  }
> > >  
> > >  void kvm_fire_mask_notifiers(struct kvm *kvm, int irq, bool mask)
> > > @@ -263,11 +265,11 @@ void kvm_fire_mask_notifiers(struct kvm *kvm, int irq, bool mask)
> > >  	struct kvm_irq_mask_notifier *kimn;
> > >  	struct hlist_node *n;
> > >  
> > > -	WARN_ON(!mutex_is_locked(&kvm->irq_lock));
> > > -
> > > -	hlist_for_each_entry(kimn, n, &kvm->mask_notifier_list, link)
> > > +	rcu_read_lock();
> > > +	hlist_for_each_entry_rcu(kimn, n, &kvm->mask_notifier_list, link)
> > >  		if (kimn->irq == irq)
> > >  			kimn->func(kimn, mask);
> > > +	rcu_read_unlock();
> > >  }
> > >  
> > >  void kvm_free_irq_routing(struct kvm *kvm)
> > > -- 
> > > 1.6.2.1
> > > 
> > > --
> > > To unsubscribe from this list: send the line "unsubscribe kvm" in
> > > the body of a message to majordomo@vger.kernel.org
> > > More majordomo info at  http://vger.kernel.org/majordomo-info.html
> 
> --
> 			Gleb.
> --
> To unsubscribe from this list: send the line "unsubscribe kvm" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Gleb Natapov July 13, 2009, 1:32 p.m. UTC | #7
On Mon, Jul 13, 2009 at 09:26:21AM -0400, Gregory Haskins wrote:
> Gleb Natapov wrote:
> > On Mon, Jul 13, 2009 at 04:02:56PM +0300, Michael S. Tsirkin wrote:
> >   
> >> On Sun, Jul 12, 2009 at 03:03:53PM +0300, Gleb Natapov wrote:
> >>     
> >>> Use RCU locking for mask/ack notifiers lists.
> >>>
> >>> Signed-off-by: Gleb Natapov <gleb@redhat.com>
> >>> ---
> >>>  virt/kvm/irq_comm.c |   20 +++++++++++---------
> >>>  1 files changed, 11 insertions(+), 9 deletions(-)
> >>>
> >>> diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c
> >>> index 5dde1ef..ba3a115 100644
> >>> --- a/virt/kvm/irq_comm.c
> >>> +++ b/virt/kvm/irq_comm.c
> >>> @@ -179,18 +179,18 @@ void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin)
> >>>  			break;
> >>>  		}
> >>>  	}
> >>> -	rcu_read_unlock();
> >>>  
> >>> -	hlist_for_each_entry(kian, n, &kvm->irq_ack_notifier_list, link)
> >>> +	hlist_for_each_entry_rcu(kian, n, &kvm->irq_ack_notifier_list, link)
> >>>  		if (kian->gsi == gsi)
> >>>  			kian->irq_acked(kian);
> >>> +	rcu_read_unlock();
> >>>  }
> >>>  
> >>>  void kvm_register_irq_ack_notifier(struct kvm *kvm,
> >>>  				   struct kvm_irq_ack_notifier *kian)
> >>>  {
> >>>  	mutex_lock(&kvm->irq_lock);
> >>> -	hlist_add_head(&kian->link, &kvm->irq_ack_notifier_list);
> >>> +	hlist_add_head_rcu(&kian->link, &kvm->irq_ack_notifier_list);
> >>>  	mutex_unlock(&kvm->irq_lock);
> >>>  }
> >>>  
> >>> @@ -198,8 +198,9 @@ void kvm_unregister_irq_ack_notifier(struct kvm *kvm,
> >>>  				    struct kvm_irq_ack_notifier *kian)
> >>>  {
> >>>  	mutex_lock(&kvm->irq_lock);
> >>> -	hlist_del_init(&kian->link);
> >>> +	hlist_del_init_rcu(&kian->link);
> >>>  	mutex_unlock(&kvm->irq_lock);
> >>> +	synchronize_rcu();
> >>>  }
> >>>       
> >> This is done under kvm->lock still, which means the lock might be held
> >> potentially for a very long time. Can synchronize_rcu be moved out of
> >> this lock?
> >>
> >>     
> > Only if kvm_free_assigned_device() will be moved out of this lock.
> > Device de-assignment is not very frequent event though. How long do you
> > think it may be held? KVM RCU read sections are very brief.
> >   
> 
> Note that the delay imposed by the barrier is not only related to the
> length of the critical section.  The barrier blocks until the next grace
> period, and depending on the type of RCU you are using and your config
> options, this could be multiple milliseconds.
> 
> I am not saying that this is definitely a problem for your design.   I
> am just pointing out that the length of the KVM-RCU read section is only
Yeah I understand that other RCU read section may introduce delays too.
The question is how big the delay may be. I don't think multiple
milliseconds delay in device de-assignment is a big issue though.

> one of several factors that influence the ultimate duration of your
> kvm->lock CS.  IIUC, in an ideally designed subsystem, the read-side CS
> will be dwarfed by the length of the grace, so the grace (and barriers
> against it) are really the critical factor of this type of design.
> 
> I am CC'ing Paul in case I am saying something dumb/untrue. ;)
> 
> Kind Regards,
> -Greg
> 
> >   
> >>>  int kvm_request_irq_source_id(struct kvm *kvm)
> >>> @@ -246,7 +247,7 @@ void kvm_register_irq_mask_notifier(struct kvm *kvm, int irq,
> >>>  {
> >>>  	mutex_lock(&kvm->irq_lock);
> >>>  	kimn->irq = irq;
> >>> -	hlist_add_head(&kimn->link, &kvm->mask_notifier_list);
> >>> +	hlist_add_head_rcu(&kimn->link, &kvm->mask_notifier_list);
> >>>  	mutex_unlock(&kvm->irq_lock);
> >>>  }
> >>>  
> >>> @@ -254,8 +255,9 @@ void kvm_unregister_irq_mask_notifier(struct kvm *kvm, int irq,
> >>>  				      struct kvm_irq_mask_notifier *kimn)
> >>>  {
> >>>  	mutex_lock(&kvm->irq_lock);
> >>> -	hlist_del(&kimn->link);
> >>> +	hlist_del_rcu(&kimn->link);
> >>>  	mutex_unlock(&kvm->irq_lock);
> >>> +	synchronize_rcu();
> >>>  }
> >>>  
> >>>  void kvm_fire_mask_notifiers(struct kvm *kvm, int irq, bool mask)
> >>> @@ -263,11 +265,11 @@ void kvm_fire_mask_notifiers(struct kvm *kvm, int irq, bool mask)
> >>>  	struct kvm_irq_mask_notifier *kimn;
> >>>  	struct hlist_node *n;
> >>>  
> >>> -	WARN_ON(!mutex_is_locked(&kvm->irq_lock));
> >>> -
> >>> -	hlist_for_each_entry(kimn, n, &kvm->mask_notifier_list, link)
> >>> +	rcu_read_lock();
> >>> +	hlist_for_each_entry_rcu(kimn, n, &kvm->mask_notifier_list, link)
> >>>  		if (kimn->irq == irq)
> >>>  			kimn->func(kimn, mask);
> >>> +	rcu_read_unlock();
> >>>  }
> >>>  
> >>>  void kvm_free_irq_routing(struct kvm *kvm)
> >>> -- 
> >>> 1.6.2.1
> >>>
> >>> --
> >>> To unsubscribe from this list: send the line "unsubscribe kvm" in
> >>> the body of a message to majordomo@vger.kernel.org
> >>> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> >>>       
> >
> > --
> > 			Gleb.
> > --
> > To unsubscribe from this list: send the line "unsubscribe kvm" in
> > the body of a message to majordomo@vger.kernel.org
> > More majordomo info at  http://vger.kernel.org/majordomo-info.html
> >   
> 
> 



--
			Gleb.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Gregory Haskins July 13, 2009, 1:40 p.m. UTC | #8
Gleb Natapov wrote:
> On Mon, Jul 13, 2009 at 09:26:21AM -0400, Gregory Haskins wrote:
>   
>> Gleb Natapov wrote:
>>     
>>> On Mon, Jul 13, 2009 at 04:02:56PM +0300, Michael S. Tsirkin wrote:
>>>   
>>>       
>>>> On Sun, Jul 12, 2009 at 03:03:53PM +0300, Gleb Natapov wrote:
>>>>     
>>>>         
>>>>> Use RCU locking for mask/ack notifiers lists.
>>>>>
>>>>> Signed-off-by: Gleb Natapov <gleb@redhat.com>
>>>>> ---
>>>>>  virt/kvm/irq_comm.c |   20 +++++++++++---------
>>>>>  1 files changed, 11 insertions(+), 9 deletions(-)
>>>>>
>>>>> diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c
>>>>> index 5dde1ef..ba3a115 100644
>>>>> --- a/virt/kvm/irq_comm.c
>>>>> +++ b/virt/kvm/irq_comm.c
>>>>> @@ -179,18 +179,18 @@ void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin)
>>>>>  			break;
>>>>>  		}
>>>>>  	}
>>>>> -	rcu_read_unlock();
>>>>>  
>>>>> -	hlist_for_each_entry(kian, n, &kvm->irq_ack_notifier_list, link)
>>>>> +	hlist_for_each_entry_rcu(kian, n, &kvm->irq_ack_notifier_list, link)
>>>>>  		if (kian->gsi == gsi)
>>>>>  			kian->irq_acked(kian);
>>>>> +	rcu_read_unlock();
>>>>>  }
>>>>>  
>>>>>  void kvm_register_irq_ack_notifier(struct kvm *kvm,
>>>>>  				   struct kvm_irq_ack_notifier *kian)
>>>>>  {
>>>>>  	mutex_lock(&kvm->irq_lock);
>>>>> -	hlist_add_head(&kian->link, &kvm->irq_ack_notifier_list);
>>>>> +	hlist_add_head_rcu(&kian->link, &kvm->irq_ack_notifier_list);
>>>>>  	mutex_unlock(&kvm->irq_lock);
>>>>>  }
>>>>>  
>>>>> @@ -198,8 +198,9 @@ void kvm_unregister_irq_ack_notifier(struct kvm *kvm,
>>>>>  				    struct kvm_irq_ack_notifier *kian)
>>>>>  {
>>>>>  	mutex_lock(&kvm->irq_lock);
>>>>> -	hlist_del_init(&kian->link);
>>>>> +	hlist_del_init_rcu(&kian->link);
>>>>>  	mutex_unlock(&kvm->irq_lock);
>>>>> +	synchronize_rcu();
>>>>>  }
>>>>>       
>>>>>           
>>>> This is done under kvm->lock still, which means the lock might be held
>>>> potentially for a very long time. Can synchronize_rcu be moved out of
>>>> this lock?
>>>>
>>>>     
>>>>         
>>> Only if kvm_free_assigned_device() will be moved out of this lock.
>>> Device de-assignment is not very frequent event though. How long do you
>>> think it may be held? KVM RCU read sections are very brief.
>>>   
>>>       
>> Note that the delay imposed by the barrier is not only related to the
>> length of the critical section.  The barrier blocks until the next grace
>> period, and depending on the type of RCU you are using and your config
>> options, this could be multiple milliseconds.
>>
>> I am not saying that this is definitely a problem for your design.   I
>> am just pointing out that the length of the KVM-RCU read section is only
>>     
> Yeah I understand that other RCU read section may introduce delays too.
> The question is how big the delay may be.

I think you are misunderstanding me.  The read-side CS is not a
significant factor here so I am not worried about concurrent read-side
CS causing a longer delay.  What I am saying is that the grace period of
your RCU subsystem is the dominant factor in the equation here, and this
may be several milliseconds.

>  I don't think multiple
> milliseconds delay in device de-assignment is a big issue though.
>   

I would tend to agree with you.  It's not fast path.

I only brought this up because I saw your design being justified
incorrectly:  you said "KVM RCU read sections are very brief", but that
is not really relevant to Michael's point.  I just want to make sure
that the true impact is understood.

Kind Regards,
-Greg
Michael S. Tsirkin July 13, 2009, 1:40 p.m. UTC | #9
On Mon, Jul 13, 2009 at 04:32:34PM +0300, Gleb Natapov wrote:
> Yeah I understand that other RCU read section may introduce delays too.
> The question is how big the delay may be.

I recall seeing the number "at least 3 jiffies" somewhere, but that
might have changed since.

> I don't think multiple
> milliseconds delay in device de-assignment is a big issue though.

Right. My point was that since the sync is done under kvm lock, the
guest can easily get blocked trying to get kvm lock meanwhile.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Gregory Haskins July 13, 2009, 1:44 p.m. UTC | #10
Michael S. Tsirkin wrote:
> On Mon, Jul 13, 2009 at 04:32:34PM +0300, Gleb Natapov wrote:
>   
>> Yeah I understand that other RCU read section may introduce delays too.
>> The question is how big the delay may be.
>>     
>
> I recall seeing the number "at least 3 jiffies" somewhere, but that
> might have changed since.
>   
I think it is dependent on how you configure your kernel...but that
sounds ballpark for one of the worst-case types IIRC.

>   
>> I don't think multiple
>> milliseconds delay in device de-assignment is a big issue though.
>>     
>
> Right. My point was that since the sync is done under kvm lock, the
> guest can easily get blocked trying to get kvm lock meanwhile.
>   

Yeah, that is really the biggest issue here, I agree.  The delay in the
actual deassignment path is NBD.

Kind Regards,
-Greg
Gregory Haskins July 13, 2009, 1:48 p.m. UTC | #11
Michael S. Tsirkin wrote:
> On Sun, Jul 12, 2009 at 03:03:53PM +0300, Gleb Natapov wrote:
>   
>> Use RCU locking for mask/ack notifiers lists.
>>
>> Signed-off-by: Gleb Natapov <gleb@redhat.com>
>> ---
>>  virt/kvm/irq_comm.c |   20 +++++++++++---------
>>  1 files changed, 11 insertions(+), 9 deletions(-)
>>
>> diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c
>> index 5dde1ef..ba3a115 100644
>> --- a/virt/kvm/irq_comm.c
>> +++ b/virt/kvm/irq_comm.c
>> @@ -179,18 +179,18 @@ void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin)
>>  			break;
>>  		}
>>  	}
>> -	rcu_read_unlock();
>>  
>> -	hlist_for_each_entry(kian, n, &kvm->irq_ack_notifier_list, link)
>> +	hlist_for_each_entry_rcu(kian, n, &kvm->irq_ack_notifier_list, link)
>>  		if (kian->gsi == gsi)
>>  			kian->irq_acked(kian);
>> +	rcu_read_unlock();
>>  }
>>  
>>  void kvm_register_irq_ack_notifier(struct kvm *kvm,
>>  				   struct kvm_irq_ack_notifier *kian)
>>  {
>>  	mutex_lock(&kvm->irq_lock);
>> -	hlist_add_head(&kian->link, &kvm->irq_ack_notifier_list);
>> +	hlist_add_head_rcu(&kian->link, &kvm->irq_ack_notifier_list);
>>  	mutex_unlock(&kvm->irq_lock);
>>     
>
> I think we need synchronize_rcu here as well. If the user adds a
> notifier, he expects to get notified of irqs immediately
> after the function returns, not after the next rcu grace period.
>   

Agreed.
>   
>>  }
>>  
>> @@ -198,8 +198,9 @@ void kvm_unregister_irq_ack_notifier(struct kvm *kvm,
>>  				    struct kvm_irq_ack_notifier *kian)
>>  {
>>  	mutex_lock(&kvm->irq_lock);
>> -	hlist_del_init(&kian->link);
>> +	hlist_del_init_rcu(&kian->link);
>>  	mutex_unlock(&kvm->irq_lock);
>> +	synchronize_rcu();
>>  }
>>  
>>  int kvm_request_irq_source_id(struct kvm *kvm)
>> @@ -246,7 +247,7 @@ void kvm_register_irq_mask_notifier(struct kvm *kvm, int irq,
>>  {
>>  	mutex_lock(&kvm->irq_lock);
>>  	kimn->irq = irq;
>> -	hlist_add_head(&kimn->link, &kvm->mask_notifier_list);
>> +	hlist_add_head_rcu(&kimn->link, &kvm->mask_notifier_list);
>>  	mutex_unlock(&kvm->irq_lock);
>>  }
>>  
>> @@ -254,8 +255,9 @@ void kvm_unregister_irq_mask_notifier(struct kvm *kvm, int irq,
>>  				      struct kvm_irq_mask_notifier *kimn)
>>  {
>>  	mutex_lock(&kvm->irq_lock);
>> -	hlist_del(&kimn->link);
>> +	hlist_del_rcu(&kimn->link);
>>  	mutex_unlock(&kvm->irq_lock);
>> +	synchronize_rcu();
>>  }
>>  
>>  void kvm_fire_mask_notifiers(struct kvm *kvm, int irq, bool mask)
>> @@ -263,11 +265,11 @@ void kvm_fire_mask_notifiers(struct kvm *kvm, int irq, bool mask)
>>  	struct kvm_irq_mask_notifier *kimn;
>>  	struct hlist_node *n;
>>  
>> -	WARN_ON(!mutex_is_locked(&kvm->irq_lock));
>> -
>> -	hlist_for_each_entry(kimn, n, &kvm->mask_notifier_list, link)
>> +	rcu_read_lock();
>> +	hlist_for_each_entry_rcu(kimn, n, &kvm->mask_notifier_list, link)
>>  		if (kimn->irq == irq)
>>  			kimn->func(kimn, mask);
>> +	rcu_read_unlock();
>>  }
>>  
>>  void kvm_free_irq_routing(struct kvm *kvm)
>> -- 
>> 1.6.2.1
>>
>> --
>> To unsubscribe from this list: send the line "unsubscribe kvm" in
>> the body of a message to majordomo@vger.kernel.org
>> More majordomo info at  http://vger.kernel.org/majordomo-info.html
>>     
> --
> To unsubscribe from this list: send the line "unsubscribe kvm" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
>
Gleb Natapov July 13, 2009, 1:52 p.m. UTC | #12
On Mon, Jul 13, 2009 at 09:40:01AM -0400, Gregory Haskins wrote:
> Gleb Natapov wrote:
> > On Mon, Jul 13, 2009 at 09:26:21AM -0400, Gregory Haskins wrote:
> >   
> >> Gleb Natapov wrote:
> >>     
> >>> On Mon, Jul 13, 2009 at 04:02:56PM +0300, Michael S. Tsirkin wrote:
> >>>   
> >>>       
> >>>> On Sun, Jul 12, 2009 at 03:03:53PM +0300, Gleb Natapov wrote:
> >>>>     
> >>>>         
> >>>>> Use RCU locking for mask/ack notifiers lists.
> >>>>>
> >>>>> Signed-off-by: Gleb Natapov <gleb@redhat.com>
> >>>>> ---
> >>>>>  virt/kvm/irq_comm.c |   20 +++++++++++---------
> >>>>>  1 files changed, 11 insertions(+), 9 deletions(-)
> >>>>>
> >>>>> diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c
> >>>>> index 5dde1ef..ba3a115 100644
> >>>>> --- a/virt/kvm/irq_comm.c
> >>>>> +++ b/virt/kvm/irq_comm.c
> >>>>> @@ -179,18 +179,18 @@ void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin)
> >>>>>  			break;
> >>>>>  		}
> >>>>>  	}
> >>>>> -	rcu_read_unlock();
> >>>>>  
> >>>>> -	hlist_for_each_entry(kian, n, &kvm->irq_ack_notifier_list, link)
> >>>>> +	hlist_for_each_entry_rcu(kian, n, &kvm->irq_ack_notifier_list, link)
> >>>>>  		if (kian->gsi == gsi)
> >>>>>  			kian->irq_acked(kian);
> >>>>> +	rcu_read_unlock();
> >>>>>  }
> >>>>>  
> >>>>>  void kvm_register_irq_ack_notifier(struct kvm *kvm,
> >>>>>  				   struct kvm_irq_ack_notifier *kian)
> >>>>>  {
> >>>>>  	mutex_lock(&kvm->irq_lock);
> >>>>> -	hlist_add_head(&kian->link, &kvm->irq_ack_notifier_list);
> >>>>> +	hlist_add_head_rcu(&kian->link, &kvm->irq_ack_notifier_list);
> >>>>>  	mutex_unlock(&kvm->irq_lock);
> >>>>>  }
> >>>>>  
> >>>>> @@ -198,8 +198,9 @@ void kvm_unregister_irq_ack_notifier(struct kvm *kvm,
> >>>>>  				    struct kvm_irq_ack_notifier *kian)
> >>>>>  {
> >>>>>  	mutex_lock(&kvm->irq_lock);
> >>>>> -	hlist_del_init(&kian->link);
> >>>>> +	hlist_del_init_rcu(&kian->link);
> >>>>>  	mutex_unlock(&kvm->irq_lock);
> >>>>> +	synchronize_rcu();
> >>>>>  }
> >>>>>       
> >>>>>           
> >>>> This is done under kvm->lock still, which means the lock might be held
> >>>> potentially for a very long time. Can synchronize_rcu be moved out of
> >>>> this lock?
> >>>>
> >>>>     
> >>>>         
> >>> Only if kvm_free_assigned_device() will be moved out of this lock.
> >>> Device de-assignment is not very frequent event though. How long do you
> >>> think it may be held? KVM RCU read sections are very brief.
> >>>   
> >>>       
> >> Note that the delay imposed by the barrier is not only related to the
> >> length of the critical section.  The barrier blocks until the next grace
> >> period, and depending on the type of RCU you are using and your config
> >> options, this could be multiple milliseconds.
> >>
> >> I am not saying that this is definitely a problem for your design.   I
> >> am just pointing out that the length of the KVM-RCU read section is only
> >>     
> > Yeah I understand that other RCU read section may introduce delays too.
> > The question is how big the delay may be.
> 
> I think you are misunderstanding me.  The read-side CS is not a
> significant factor here so I am not worried about concurrent read-side
> CS causing a longer delay.  What I am saying is that the grace period of
> your RCU subsystem is the dominant factor in the equation here, and this
> may be several milliseconds.
> 
How is the "grace period" is determined? Isn't it just means "no cpus is
in RCU read section anymore"?

> >  I don't think multiple
> > milliseconds delay in device de-assignment is a big issue though.
> >   
> 
> I would tend to agree with you.  It's not fast path.
> 
> I only brought this up because I saw your design being justified
> incorrectly:  you said "KVM RCU read sections are very brief", but that
> is not really relevant to Michael's point.  I just want to make sure
> that the true impact is understood.
> 
> Kind Regards,
> -Greg
> 
> 



--
			Gleb.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Gregory Haskins July 13, 2009, 2:02 p.m. UTC | #13
Gleb Natapov wrote:
> On Mon, Jul 13, 2009 at 09:40:01AM -0400, Gregory Haskins wrote:
>   
>> Gleb Natapov wrote:
>>     
>>> On Mon, Jul 13, 2009 at 09:26:21AM -0400, Gregory Haskins wrote:
>>>   
>>>       
>>>> Gleb Natapov wrote:
>>>>     
>>>>         
>>>>> On Mon, Jul 13, 2009 at 04:02:56PM +0300, Michael S. Tsirkin wrote:
>>>>>   
>>>>>       
>>>>>           
>>>>>> On Sun, Jul 12, 2009 at 03:03:53PM +0300, Gleb Natapov wrote:
>>>>>>     
>>>>>>         
>>>>>>             
>>>>>>> Use RCU locking for mask/ack notifiers lists.
>>>>>>>
>>>>>>> Signed-off-by: Gleb Natapov <gleb@redhat.com>
>>>>>>> ---
>>>>>>>  virt/kvm/irq_comm.c |   20 +++++++++++---------
>>>>>>>  1 files changed, 11 insertions(+), 9 deletions(-)
>>>>>>>
>>>>>>> diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c
>>>>>>> index 5dde1ef..ba3a115 100644
>>>>>>> --- a/virt/kvm/irq_comm.c
>>>>>>> +++ b/virt/kvm/irq_comm.c
>>>>>>> @@ -179,18 +179,18 @@ void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin)
>>>>>>>  			break;
>>>>>>>  		}
>>>>>>>  	}
>>>>>>> -	rcu_read_unlock();
>>>>>>>  
>>>>>>> -	hlist_for_each_entry(kian, n, &kvm->irq_ack_notifier_list, link)
>>>>>>> +	hlist_for_each_entry_rcu(kian, n, &kvm->irq_ack_notifier_list, link)
>>>>>>>  		if (kian->gsi == gsi)
>>>>>>>  			kian->irq_acked(kian);
>>>>>>> +	rcu_read_unlock();
>>>>>>>  }
>>>>>>>  
>>>>>>>  void kvm_register_irq_ack_notifier(struct kvm *kvm,
>>>>>>>  				   struct kvm_irq_ack_notifier *kian)
>>>>>>>  {
>>>>>>>  	mutex_lock(&kvm->irq_lock);
>>>>>>> -	hlist_add_head(&kian->link, &kvm->irq_ack_notifier_list);
>>>>>>> +	hlist_add_head_rcu(&kian->link, &kvm->irq_ack_notifier_list);
>>>>>>>  	mutex_unlock(&kvm->irq_lock);
>>>>>>>  }
>>>>>>>  
>>>>>>> @@ -198,8 +198,9 @@ void kvm_unregister_irq_ack_notifier(struct kvm *kvm,
>>>>>>>  				    struct kvm_irq_ack_notifier *kian)
>>>>>>>  {
>>>>>>>  	mutex_lock(&kvm->irq_lock);
>>>>>>> -	hlist_del_init(&kian->link);
>>>>>>> +	hlist_del_init_rcu(&kian->link);
>>>>>>>  	mutex_unlock(&kvm->irq_lock);
>>>>>>> +	synchronize_rcu();
>>>>>>>  }
>>>>>>>       
>>>>>>>           
>>>>>>>               
>>>>>> This is done under kvm->lock still, which means the lock might be held
>>>>>> potentially for a very long time. Can synchronize_rcu be moved out of
>>>>>> this lock?
>>>>>>
>>>>>>     
>>>>>>         
>>>>>>             
>>>>> Only if kvm_free_assigned_device() will be moved out of this lock.
>>>>> Device de-assignment is not very frequent event though. How long do you
>>>>> think it may be held? KVM RCU read sections are very brief.
>>>>>   
>>>>>       
>>>>>           
>>>> Note that the delay imposed by the barrier is not only related to the
>>>> length of the critical section.  The barrier blocks until the next grace
>>>> period, and depending on the type of RCU you are using and your config
>>>> options, this could be multiple milliseconds.
>>>>
>>>> I am not saying that this is definitely a problem for your design.   I
>>>> am just pointing out that the length of the KVM-RCU read section is only
>>>>     
>>>>         
>>> Yeah I understand that other RCU read section may introduce delays too.
>>> The question is how big the delay may be.
>>>       
>> I think you are misunderstanding me.  The read-side CS is not a
>> significant factor here so I am not worried about concurrent read-side
>> CS causing a longer delay.  What I am saying is that the grace period of
>> your RCU subsystem is the dominant factor in the equation here, and this
>> may be several milliseconds.
>>
>>     
> How is the "grace period" is determined? Isn't it just means "no cpus is
> in RCU read section anymore"?
>   

Nope ;)

RCU is pretty complex, so I won't even try to explain it here as there
are numerous articles floating around out there that do a much better job.

But here is a summary:  RCU buys you two things: 1) concurrent readers
*and* writers, and 2) a much lower overhead reader path because it
generally doesn't use atomic.  Its point (2) that is relevant here.

If taking an atomic were ok, you could approximate the RCU model using
reference counting.  Reference counting buys you "precise" resource
acquistion/release at the expense of the overhead of the atomic
operation (and any associated cache-line bouncing).  RCU uses a
"imprecise" model where we don't really know the *exact* moment the
resource is released.  Instead, there are specific boundaries in time
when we can guarantee that it had to have been released prior to the
expiry of the event.  This event is what is called the "grace period".

So that is what synchronize_rcu() is doing.  Its a barrier to the next
imprecise moment in time when we can be assured (if you used the rest of
the RCU API properly) that there can not be any outstanding references
to your object left in flight.  Each grace period can be milliseconds,
depending on what version of the kernel you have and how it is configured.

HTH

Kind Regards,
-Greg

>   
>>>  I don't think multiple
>>> milliseconds delay in device de-assignment is a big issue though.
>>>   
>>>       
>> I would tend to agree with you.  It's not fast path.
>>
>> I only brought this up because I saw your design being justified
>> incorrectly:  you said "KVM RCU read sections are very brief", but that
>> is not really relevant to Michael's point.  I just want to make sure
>> that the true impact is understood.
>>
>> Kind Regards,
>> -Greg
>>
>>
>>     
>
>
>
> --
> 			Gleb.
>
Gleb Natapov July 13, 2009, 2:08 p.m. UTC | #14
On Mon, Jul 13, 2009 at 10:02:13AM -0400, Gregory Haskins wrote:
> Gleb Natapov wrote:
> > On Mon, Jul 13, 2009 at 09:40:01AM -0400, Gregory Haskins wrote:
> >   
> >> Gleb Natapov wrote:
> >>     
> >>> On Mon, Jul 13, 2009 at 09:26:21AM -0400, Gregory Haskins wrote:
> >>>   
> >>>       
> >>>> Gleb Natapov wrote:
> >>>>     
> >>>>         
> >>>>> On Mon, Jul 13, 2009 at 04:02:56PM +0300, Michael S. Tsirkin wrote:
> >>>>>   
> >>>>>       
> >>>>>           
> >>>>>> On Sun, Jul 12, 2009 at 03:03:53PM +0300, Gleb Natapov wrote:
> >>>>>>     
> >>>>>>         
> >>>>>>             
> >>>>>>> Use RCU locking for mask/ack notifiers lists.
> >>>>>>>
> >>>>>>> Signed-off-by: Gleb Natapov <gleb@redhat.com>
> >>>>>>> ---
> >>>>>>>  virt/kvm/irq_comm.c |   20 +++++++++++---------
> >>>>>>>  1 files changed, 11 insertions(+), 9 deletions(-)
> >>>>>>>
> >>>>>>> diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c
> >>>>>>> index 5dde1ef..ba3a115 100644
> >>>>>>> --- a/virt/kvm/irq_comm.c
> >>>>>>> +++ b/virt/kvm/irq_comm.c
> >>>>>>> @@ -179,18 +179,18 @@ void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin)
> >>>>>>>  			break;
> >>>>>>>  		}
> >>>>>>>  	}
> >>>>>>> -	rcu_read_unlock();
> >>>>>>>  
> >>>>>>> -	hlist_for_each_entry(kian, n, &kvm->irq_ack_notifier_list, link)
> >>>>>>> +	hlist_for_each_entry_rcu(kian, n, &kvm->irq_ack_notifier_list, link)
> >>>>>>>  		if (kian->gsi == gsi)
> >>>>>>>  			kian->irq_acked(kian);
> >>>>>>> +	rcu_read_unlock();
> >>>>>>>  }
> >>>>>>>  
> >>>>>>>  void kvm_register_irq_ack_notifier(struct kvm *kvm,
> >>>>>>>  				   struct kvm_irq_ack_notifier *kian)
> >>>>>>>  {
> >>>>>>>  	mutex_lock(&kvm->irq_lock);
> >>>>>>> -	hlist_add_head(&kian->link, &kvm->irq_ack_notifier_list);
> >>>>>>> +	hlist_add_head_rcu(&kian->link, &kvm->irq_ack_notifier_list);
> >>>>>>>  	mutex_unlock(&kvm->irq_lock);
> >>>>>>>  }
> >>>>>>>  
> >>>>>>> @@ -198,8 +198,9 @@ void kvm_unregister_irq_ack_notifier(struct kvm *kvm,
> >>>>>>>  				    struct kvm_irq_ack_notifier *kian)
> >>>>>>>  {
> >>>>>>>  	mutex_lock(&kvm->irq_lock);
> >>>>>>> -	hlist_del_init(&kian->link);
> >>>>>>> +	hlist_del_init_rcu(&kian->link);
> >>>>>>>  	mutex_unlock(&kvm->irq_lock);
> >>>>>>> +	synchronize_rcu();
> >>>>>>>  }
> >>>>>>>       
> >>>>>>>           
> >>>>>>>               
> >>>>>> This is done under kvm->lock still, which means the lock might be held
> >>>>>> potentially for a very long time. Can synchronize_rcu be moved out of
> >>>>>> this lock?
> >>>>>>
> >>>>>>     
> >>>>>>         
> >>>>>>             
> >>>>> Only if kvm_free_assigned_device() will be moved out of this lock.
> >>>>> Device de-assignment is not very frequent event though. How long do you
> >>>>> think it may be held? KVM RCU read sections are very brief.
> >>>>>   
> >>>>>       
> >>>>>           
> >>>> Note that the delay imposed by the barrier is not only related to the
> >>>> length of the critical section.  The barrier blocks until the next grace
> >>>> period, and depending on the type of RCU you are using and your config
> >>>> options, this could be multiple milliseconds.
> >>>>
> >>>> I am not saying that this is definitely a problem for your design.   I
> >>>> am just pointing out that the length of the KVM-RCU read section is only
> >>>>     
> >>>>         
> >>> Yeah I understand that other RCU read section may introduce delays too.
> >>> The question is how big the delay may be.
> >>>       
> >> I think you are misunderstanding me.  The read-side CS is not a
> >> significant factor here so I am not worried about concurrent read-side
> >> CS causing a longer delay.  What I am saying is that the grace period of
> >> your RCU subsystem is the dominant factor in the equation here, and this
> >> may be several milliseconds.
> >>
> >>     
> > How is the "grace period" is determined? Isn't it just means "no cpus is
> > in RCU read section anymore"?
> >   
> 
> Nope ;)
> 
Now I recall something about each CPU passing scheduler. Thanks.

> RCU is pretty complex, so I won't even try to explain it here as there
> are numerous articles floating around out there that do a much better job.
> 
> But here is a summary:  RCU buys you two things: 1) concurrent readers
> *and* writers, and 2) a much lower overhead reader path because it
> generally doesn't use atomic.  Its point (2) that is relevant here.
> 
> If taking an atomic were ok, you could approximate the RCU model using
> reference counting.  Reference counting buys you "precise" resource
> acquistion/release at the expense of the overhead of the atomic
> operation (and any associated cache-line bouncing).  RCU uses a
> "imprecise" model where we don't really know the *exact* moment the
> resource is released.  Instead, there are specific boundaries in time
> when we can guarantee that it had to have been released prior to the
> expiry of the event.  This event is what is called the "grace period".
> 
> So that is what synchronize_rcu() is doing.  Its a barrier to the next
> imprecise moment in time when we can be assured (if you used the rest of
> the RCU API properly) that there can not be any outstanding references
> to your object left in flight.  Each grace period can be milliseconds,
> depending on what version of the kernel you have and how it is configured.
> 
> HTH
> 
> Kind Regards,
> -Greg
> 
> >   
> >>>  I don't think multiple
> >>> milliseconds delay in device de-assignment is a big issue though.
> >>>   
> >>>       
> >> I would tend to agree with you.  It's not fast path.
> >>
> >> I only brought this up because I saw your design being justified
> >> incorrectly:  you said "KVM RCU read sections are very brief", but that
> >> is not really relevant to Michael's point.  I just want to make sure
> >> that the true impact is understood.
> >>
> >> Kind Regards,
> >> -Greg
> >>
> >>
> >>     
> >
> >
> >
> > --
> > 			Gleb.
> >   
> 
> 



--
			Gleb.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Paul E. McKenney July 13, 2009, 7:31 p.m. UTC | #15
On Mon, Jul 13, 2009 at 04:40:59PM +0300, Michael S. Tsirkin wrote:
> On Mon, Jul 13, 2009 at 04:32:34PM +0300, Gleb Natapov wrote:
> > Yeah I understand that other RCU read section may introduce delays too.
> > The question is how big the delay may be.
> 
> I recall seeing the number "at least 3 jiffies" somewhere, but that
> might have changed since.

A grace period lasts a handful of jiffies, depending on kernel
configuration and how long readers remain in a given RCU read-side
critical section.

If a handful of jiffies is too long, there are patches that speed up
the grace period, down into the sub-hundred-microsecond range.

> > I don't think multiple
> > milliseconds delay in device de-assignment is a big issue though.
> 
> Right. My point was that since the sync is done under kvm lock, the
> guest can easily get blocked trying to get kvm lock meanwhile.

I will ask the usual question -- can call_rcu() be used to move
the grace period out from under the lock?  (Often this can be done,
but not always.)

							Thanx, Paul
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Gleb Natapov July 14, 2009, 5:46 a.m. UTC | #16
On Mon, Jul 13, 2009 at 12:31:39PM -0700, Paul E. McKenney wrote:
> On Mon, Jul 13, 2009 at 04:40:59PM +0300, Michael S. Tsirkin wrote:
> > On Mon, Jul 13, 2009 at 04:32:34PM +0300, Gleb Natapov wrote:
> > > Yeah I understand that other RCU read section may introduce delays too.
> > > The question is how big the delay may be.
> > 
> > I recall seeing the number "at least 3 jiffies" somewhere, but that
> > might have changed since.
> 
> A grace period lasts a handful of jiffies, depending on kernel
> configuration and how long readers remain in a given RCU read-side
> critical section.
> 
> If a handful of jiffies is too long, there are patches that speed up
> the grace period, down into the sub-hundred-microsecond range.
> 
> > > I don't think multiple
> > > milliseconds delay in device de-assignment is a big issue though.
> > 
> > Right. My point was that since the sync is done under kvm lock, the
> > guest can easily get blocked trying to get kvm lock meanwhile.
> 
> I will ask the usual question -- can call_rcu() be used to move
> the grace period out from under the lock?  (Often this can be done,
> but not always.)
> 
It can't. Caller frees the data.

--
			Gleb.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Paul E. McKenney July 14, 2009, 12:03 p.m. UTC | #17
On Tue, Jul 14, 2009 at 08:46:12AM +0300, Gleb Natapov wrote:
> On Mon, Jul 13, 2009 at 12:31:39PM -0700, Paul E. McKenney wrote:
> > On Mon, Jul 13, 2009 at 04:40:59PM +0300, Michael S. Tsirkin wrote:
> > > On Mon, Jul 13, 2009 at 04:32:34PM +0300, Gleb Natapov wrote:
> > > > Yeah I understand that other RCU read section may introduce delays too.
> > > > The question is how big the delay may be.
> > > 
> > > I recall seeing the number "at least 3 jiffies" somewhere, but that
> > > might have changed since.
> > 
> > A grace period lasts a handful of jiffies, depending on kernel
> > configuration and how long readers remain in a given RCU read-side
> > critical section.
> > 
> > If a handful of jiffies is too long, there are patches that speed up
> > the grace period, down into the sub-hundred-microsecond range.
> > 
> > > > I don't think multiple
> > > > milliseconds delay in device de-assignment is a big issue though.
> > > 
> > > Right. My point was that since the sync is done under kvm lock, the
> > > guest can easily get blocked trying to get kvm lock meanwhile.
> > 
> > I will ask the usual question -- can call_rcu() be used to move
> > the grace period out from under the lock?  (Often this can be done,
> > but not always.)
> > 
> It can't. Caller frees the data.

I will then ask the usual next question...  Can the freeing of the
data be moved from the caller?

						Thanx, Paul
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Gleb Natapov July 14, 2009, 12:06 p.m. UTC | #18
On Tue, Jul 14, 2009 at 05:03:09AM -0700, Paul E. McKenney wrote:
> On Tue, Jul 14, 2009 at 08:46:12AM +0300, Gleb Natapov wrote:
> > On Mon, Jul 13, 2009 at 12:31:39PM -0700, Paul E. McKenney wrote:
> > > On Mon, Jul 13, 2009 at 04:40:59PM +0300, Michael S. Tsirkin wrote:
> > > > On Mon, Jul 13, 2009 at 04:32:34PM +0300, Gleb Natapov wrote:
> > > > > Yeah I understand that other RCU read section may introduce delays too.
> > > > > The question is how big the delay may be.
> > > > 
> > > > I recall seeing the number "at least 3 jiffies" somewhere, but that
> > > > might have changed since.
> > > 
> > > A grace period lasts a handful of jiffies, depending on kernel
> > > configuration and how long readers remain in a given RCU read-side
> > > critical section.
> > > 
> > > If a handful of jiffies is too long, there are patches that speed up
> > > the grace period, down into the sub-hundred-microsecond range.
> > > 
> > > > > I don't think multiple
> > > > > milliseconds delay in device de-assignment is a big issue though.
> > > > 
> > > > Right. My point was that since the sync is done under kvm lock, the
> > > > guest can easily get blocked trying to get kvm lock meanwhile.
> > > 
> > > I will ask the usual question -- can call_rcu() be used to move
> > > the grace period out from under the lock?  (Often this can be done,
> > > but not always.)
> > > 
> > It can't. Caller frees the data.
> 
> I will then ask the usual next question...  Can the freeing of the
> data be moved from the caller?
> 
Not really. But don't worry. In the case we discuss a small delay is not
a problem.

--
			Gleb.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c
index 5dde1ef..ba3a115 100644
--- a/virt/kvm/irq_comm.c
+++ b/virt/kvm/irq_comm.c
@@ -179,18 +179,18 @@  void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin)
 			break;
 		}
 	}
-	rcu_read_unlock();
 
-	hlist_for_each_entry(kian, n, &kvm->irq_ack_notifier_list, link)
+	hlist_for_each_entry_rcu(kian, n, &kvm->irq_ack_notifier_list, link)
 		if (kian->gsi == gsi)
 			kian->irq_acked(kian);
+	rcu_read_unlock();
 }
 
 void kvm_register_irq_ack_notifier(struct kvm *kvm,
 				   struct kvm_irq_ack_notifier *kian)
 {
 	mutex_lock(&kvm->irq_lock);
-	hlist_add_head(&kian->link, &kvm->irq_ack_notifier_list);
+	hlist_add_head_rcu(&kian->link, &kvm->irq_ack_notifier_list);
 	mutex_unlock(&kvm->irq_lock);
 }
 
@@ -198,8 +198,9 @@  void kvm_unregister_irq_ack_notifier(struct kvm *kvm,
 				    struct kvm_irq_ack_notifier *kian)
 {
 	mutex_lock(&kvm->irq_lock);
-	hlist_del_init(&kian->link);
+	hlist_del_init_rcu(&kian->link);
 	mutex_unlock(&kvm->irq_lock);
+	synchronize_rcu();
 }
 
 int kvm_request_irq_source_id(struct kvm *kvm)
@@ -246,7 +247,7 @@  void kvm_register_irq_mask_notifier(struct kvm *kvm, int irq,
 {
 	mutex_lock(&kvm->irq_lock);
 	kimn->irq = irq;
-	hlist_add_head(&kimn->link, &kvm->mask_notifier_list);
+	hlist_add_head_rcu(&kimn->link, &kvm->mask_notifier_list);
 	mutex_unlock(&kvm->irq_lock);
 }
 
@@ -254,8 +255,9 @@  void kvm_unregister_irq_mask_notifier(struct kvm *kvm, int irq,
 				      struct kvm_irq_mask_notifier *kimn)
 {
 	mutex_lock(&kvm->irq_lock);
-	hlist_del(&kimn->link);
+	hlist_del_rcu(&kimn->link);
 	mutex_unlock(&kvm->irq_lock);
+	synchronize_rcu();
 }
 
 void kvm_fire_mask_notifiers(struct kvm *kvm, int irq, bool mask)
@@ -263,11 +265,11 @@  void kvm_fire_mask_notifiers(struct kvm *kvm, int irq, bool mask)
 	struct kvm_irq_mask_notifier *kimn;
 	struct hlist_node *n;
 
-	WARN_ON(!mutex_is_locked(&kvm->irq_lock));
-
-	hlist_for_each_entry(kimn, n, &kvm->mask_notifier_list, link)
+	rcu_read_lock();
+	hlist_for_each_entry_rcu(kimn, n, &kvm->mask_notifier_list, link)
 		if (kimn->irq == irq)
 			kimn->func(kimn, mask);
+	rcu_read_unlock();
 }
 
 void kvm_free_irq_routing(struct kvm *kvm)