diff mbox

[1/4] Move irq routing data structure to rcu locking

Message ID 1247400233-24243-2-git-send-email-gleb@redhat.com (mailing list archive)
State New, archived
Headers show

Commit Message

Gleb Natapov July 12, 2009, 12:03 p.m. UTC
Signed-off-by: Gleb Natapov <gleb@redhat.com>
---
 include/linux/kvm_host.h |    2 +-
 virt/kvm/irq_comm.c      |   55 +++++++++++++++++++++-------------------------
 virt/kvm/kvm_main.c      |    1 -
 3 files changed, 26 insertions(+), 32 deletions(-)

Comments

Michael S. Tsirkin July 13, 2009, 12:55 p.m. UTC | #1
On Sun, Jul 12, 2009 at 03:03:50PM +0300, Gleb Natapov wrote:
> 
> Signed-off-by: Gleb Natapov <gleb@redhat.com>
> ---
>  include/linux/kvm_host.h |    2 +-
>  virt/kvm/irq_comm.c      |   55 +++++++++++++++++++++-------------------------
>  virt/kvm/kvm_main.c      |    1 -
>  3 files changed, 26 insertions(+), 32 deletions(-)
> 
> diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
> index f54a0d3..6756b3e 100644
> --- a/include/linux/kvm_host.h
> +++ b/include/linux/kvm_host.h
> @@ -161,7 +161,7 @@ struct kvm {
>  
>  	struct mutex irq_lock;
>  #ifdef CONFIG_HAVE_KVM_IRQCHIP
> -	struct list_head irq_routing; /* of kvm_kernel_irq_routing_entry */
> +	struct kvm_kernel_irq_routing_entry *irq_routing;
>  	struct hlist_head mask_notifier_list;
>  #endif
>  
> diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c
> index 7af18b8..b2fa3f6 100644
> --- a/virt/kvm/irq_comm.c
> +++ b/virt/kvm/irq_comm.c
> @@ -148,7 +148,8 @@ int kvm_set_irq(struct kvm *kvm, int irq_source_id, int irq, int level)
>  	 * IOAPIC.  So set the bit in both. The guest will ignore
>  	 * writes to the unused one.
>  	 */
> -	list_for_each_entry(e, &kvm->irq_routing, link)
> +	rcu_read_lock();
> +	for (e = rcu_dereference(kvm->irq_routing); e && e->set; e++) {
>  		if (e->gsi == irq) {
>  			int r = e->set(e, kvm, sig_level);
>  			if (r < 0)
> @@ -156,6 +157,8 @@ int kvm_set_irq(struct kvm *kvm, int irq_source_id, int irq, int level)
>  
>  			ret = r + ((ret < 0) ? 0 : ret);
>  		}
> +	}
> +	rcu_read_unlock();
>  	return ret;
>  }
>  
> @@ -168,12 +171,15 @@ void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin)
>  
>  	trace_kvm_ack_irq(irqchip, pin);
>  
> -	list_for_each_entry(e, &kvm->irq_routing, link)
> +	rcu_read_lock();
> +	for (e = rcu_dereference(kvm->irq_routing); e && e->set; e++) {
>  		if (e->irqchip.irqchip == irqchip &&
>  		    e->irqchip.pin == pin) {
>  			gsi = e->gsi;
>  			break;
>  		}
> +	}
> +	rcu_read_unlock();
>  
>  	hlist_for_each_entry(kian, n, &kvm->arch.irq_ack_notifier_list, link)
>  		if (kian->gsi == gsi)
> @@ -264,19 +270,11 @@ void kvm_fire_mask_notifiers(struct kvm *kvm, int irq, bool mask)
>  			kimn->func(kimn, mask);
>  }
>  
> -static void __kvm_free_irq_routing(struct list_head *irq_routing)
> -{
> -	struct kvm_kernel_irq_routing_entry *e, *n;
> -
> -	list_for_each_entry_safe(e, n, irq_routing, link)
> -		kfree(e);
> -}
> -
>  void kvm_free_irq_routing(struct kvm *kvm)
>  {
> -	mutex_lock(&kvm->irq_lock);
> -	__kvm_free_irq_routing(&kvm->irq_routing);
> -	mutex_unlock(&kvm->irq_lock);
> +	/* Called only during vm destruction. Nobody can use the pointer
> +	   at this stage */
> +	kfree(kvm->irq_routing);
>  }
>  
>  static int setup_routing_entry(struct kvm_kernel_irq_routing_entry *e,
> @@ -326,43 +324,40 @@ int kvm_set_irq_routing(struct kvm *kvm,
>  			unsigned nr,
>  			unsigned flags)
>  {
> -	struct list_head irq_list = LIST_HEAD_INIT(irq_list);
> -	struct list_head tmp = LIST_HEAD_INIT(tmp);
> -	struct kvm_kernel_irq_routing_entry *e = NULL;
> +	struct kvm_kernel_irq_routing_entry *new, *old;
>  	unsigned i;
>  	int r;
>  
> +	/* last element is left zeroed and indicates the end of the array */
> +	new = kzalloc(sizeof(*new) * (nr + 1), GFP_KERNEL);

There are up to 1K entries, and each one seems to be around 32 bytes.
Are there chances that we won't be able to find such a chunk of
contiguous memory on a busy system?  

Since the tmp list is never traversed while it is assigned, we can, instead,
build a new list as we did and simply replace list_splice with these bits from
list_splice_init_rcu:

static inline void list_splice_tmp_rcu(struct list_head *tmp,
				       struct list_head *head) {
	struct list_head *first = tmp->next;
	struct list_head *last = tmp->prev;
	struct list_head *at = head->next;
	last->next = at;
	rcu_assign_pointer(head->next, first);
	first->prev = head;
	at->prev = last;
}


> +
> +	if (!new)
> +		return -ENOMEM;
> +
>  	for (i = 0; i < nr; ++i) {
>  		r = -EINVAL;
>  		if (ue->gsi >= KVM_MAX_IRQ_ROUTES)
>  			goto out;
>  		if (ue->flags)
>  			goto out;
> -		r = -ENOMEM;
> -		e = kzalloc(sizeof(*e), GFP_KERNEL);
> -		if (!e)
> -			goto out;
> -		r = setup_routing_entry(e, ue);
> +		r = setup_routing_entry(new + i, ue);
>  		if (r)
>  			goto out;
>  		++ue;
> -		list_add(&e->link, &irq_list);
> -		e = NULL;
>  	}
>  
>  	mutex_lock(&kvm->irq_lock);
> -	list_splice(&kvm->irq_routing, &tmp);
> -	INIT_LIST_HEAD(&kvm->irq_routing);
> -	list_splice(&irq_list, &kvm->irq_routing);
> -	INIT_LIST_HEAD(&irq_list);
> -	list_splice(&tmp, &irq_list);
> +	old = kvm->irq_routing;
> +	rcu_assign_pointer(kvm->irq_routing, new);
>  	mutex_unlock(&kvm->irq_lock);
>  
> +	synchronize_rcu();
> +
>  	r = 0;
> +	new = old;
>  
>  out:
> -	kfree(e);
> -	__kvm_free_irq_routing(&irq_list);
> +	kfree(new);
>  	return r;
>  }
>  
> diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
> index cf20dc1..24013b4 100644
> --- a/virt/kvm/kvm_main.c
> +++ b/virt/kvm/kvm_main.c
> @@ -945,7 +945,6 @@ static struct kvm *kvm_create_vm(void)
>  	if (IS_ERR(kvm))
>  		goto out;
>  #ifdef CONFIG_HAVE_KVM_IRQCHIP
> -	INIT_LIST_HEAD(&kvm->irq_routing);
>  	INIT_HLIST_HEAD(&kvm->mask_notifier_list);
>  #endif
>  
> -- 
> 1.6.2.1
> 
> --
> To unsubscribe from this list: send the line "unsubscribe kvm" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Gregory Haskins July 13, 2009, 1:01 p.m. UTC | #2
Gleb Natapov wrote:
> Signed-off-by: Gleb Natapov <gleb@redhat.com>
> ---
>  include/linux/kvm_host.h |    2 +-
>  virt/kvm/irq_comm.c      |   55 +++++++++++++++++++++-------------------------
>  virt/kvm/kvm_main.c      |    1 -
>  3 files changed, 26 insertions(+), 32 deletions(-)
>
> diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
> index f54a0d3..6756b3e 100644
> --- a/include/linux/kvm_host.h
> +++ b/include/linux/kvm_host.h
> @@ -161,7 +161,7 @@ struct kvm {
>  
>  	struct mutex irq_lock;
>  #ifdef CONFIG_HAVE_KVM_IRQCHIP
> -	struct list_head irq_routing; /* of kvm_kernel_irq_routing_entry */
> +	struct kvm_kernel_irq_routing_entry *irq_routing;
>  	struct hlist_head mask_notifier_list;
>  #endif
>  
> diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c
> index 7af18b8..b2fa3f6 100644
> --- a/virt/kvm/irq_comm.c
> +++ b/virt/kvm/irq_comm.c
> @@ -148,7 +148,8 @@ int kvm_set_irq(struct kvm *kvm, int irq_source_id, int irq, int level)
>  	 * IOAPIC.  So set the bit in both. The guest will ignore
>  	 * writes to the unused one.
>  	 */
> -	list_for_each_entry(e, &kvm->irq_routing, link)
> +	rcu_read_lock();
> +	for (e = rcu_dereference(kvm->irq_routing); e && e->set; e++) {
>   

Hi Gleb,
  I haven't had a chance to fully digest and review these patches, but
one thing I did notice is that you seem to be converting from a list to
an open-coded structure.  I am just curious why you made this design
decision instead of using the RCU variant of list?

Regards,
-Greg

>  		if (e->gsi == irq) {
>  			int r = e->set(e, kvm, sig_level);
>  			if (r < 0)
> @@ -156,6 +157,8 @@ int kvm_set_irq(struct kvm *kvm, int irq_source_id, int irq, int level)
>  
>  			ret = r + ((ret < 0) ? 0 : ret);
>  		}
> +	}
> +	rcu_read_unlock();
>  	return ret;
>  }
>  
> @@ -168,12 +171,15 @@ void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin)
>  
>  	trace_kvm_ack_irq(irqchip, pin);
>  
> -	list_for_each_entry(e, &kvm->irq_routing, link)
> +	rcu_read_lock();
> +	for (e = rcu_dereference(kvm->irq_routing); e && e->set; e++) {
>  		if (e->irqchip.irqchip == irqchip &&
>  		    e->irqchip.pin == pin) {
>  			gsi = e->gsi;
>  			break;
>  		}
> +	}
> +	rcu_read_unlock();
>  
>  	hlist_for_each_entry(kian, n, &kvm->arch.irq_ack_notifier_list, link)
>  		if (kian->gsi == gsi)
> @@ -264,19 +270,11 @@ void kvm_fire_mask_notifiers(struct kvm *kvm, int irq, bool mask)
>  			kimn->func(kimn, mask);
>  }
>  
> -static void __kvm_free_irq_routing(struct list_head *irq_routing)
> -{
> -	struct kvm_kernel_irq_routing_entry *e, *n;
> -
> -	list_for_each_entry_safe(e, n, irq_routing, link)
> -		kfree(e);
> -}
> -
>  void kvm_free_irq_routing(struct kvm *kvm)
>  {
> -	mutex_lock(&kvm->irq_lock);
> -	__kvm_free_irq_routing(&kvm->irq_routing);
> -	mutex_unlock(&kvm->irq_lock);
> +	/* Called only during vm destruction. Nobody can use the pointer
> +	   at this stage */
> +	kfree(kvm->irq_routing);
>  }
>  
>  static int setup_routing_entry(struct kvm_kernel_irq_routing_entry *e,
> @@ -326,43 +324,40 @@ int kvm_set_irq_routing(struct kvm *kvm,
>  			unsigned nr,
>  			unsigned flags)
>  {
> -	struct list_head irq_list = LIST_HEAD_INIT(irq_list);
> -	struct list_head tmp = LIST_HEAD_INIT(tmp);
> -	struct kvm_kernel_irq_routing_entry *e = NULL;
> +	struct kvm_kernel_irq_routing_entry *new, *old;
>  	unsigned i;
>  	int r;
>  
> +	/* last element is left zeroed and indicates the end of the array */
> +	new = kzalloc(sizeof(*new) * (nr + 1), GFP_KERNEL);
> +
> +	if (!new)
> +		return -ENOMEM;
> +
>  	for (i = 0; i < nr; ++i) {
>  		r = -EINVAL;
>  		if (ue->gsi >= KVM_MAX_IRQ_ROUTES)
>  			goto out;
>  		if (ue->flags)
>  			goto out;
> -		r = -ENOMEM;
> -		e = kzalloc(sizeof(*e), GFP_KERNEL);
> -		if (!e)
> -			goto out;
> -		r = setup_routing_entry(e, ue);
> +		r = setup_routing_entry(new + i, ue);
>  		if (r)
>  			goto out;
>  		++ue;
> -		list_add(&e->link, &irq_list);
> -		e = NULL;
>  	}
>  
>  	mutex_lock(&kvm->irq_lock);
> -	list_splice(&kvm->irq_routing, &tmp);
> -	INIT_LIST_HEAD(&kvm->irq_routing);
> -	list_splice(&irq_list, &kvm->irq_routing);
> -	INIT_LIST_HEAD(&irq_list);
> -	list_splice(&tmp, &irq_list);
> +	old = kvm->irq_routing;
> +	rcu_assign_pointer(kvm->irq_routing, new);
>  	mutex_unlock(&kvm->irq_lock);
>  
> +	synchronize_rcu();
> +
>  	r = 0;
> +	new = old;
>  
>  out:
> -	kfree(e);
> -	__kvm_free_irq_routing(&irq_list);
> +	kfree(new);
>  	return r;
>  }
>  
> diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
> index cf20dc1..24013b4 100644
> --- a/virt/kvm/kvm_main.c
> +++ b/virt/kvm/kvm_main.c
> @@ -945,7 +945,6 @@ static struct kvm *kvm_create_vm(void)
>  	if (IS_ERR(kvm))
>  		goto out;
>  #ifdef CONFIG_HAVE_KVM_IRQCHIP
> -	INIT_LIST_HEAD(&kvm->irq_routing);
>  	INIT_HLIST_HEAD(&kvm->mask_notifier_list);
>  #endif
>  
>
Gleb Natapov July 13, 2009, 1:03 p.m. UTC | #3
On Mon, Jul 13, 2009 at 03:55:07PM +0300, Michael S. Tsirkin wrote:
> On Sun, Jul 12, 2009 at 03:03:50PM +0300, Gleb Natapov wrote:
> > 
> > Signed-off-by: Gleb Natapov <gleb@redhat.com>
> > ---
> >  include/linux/kvm_host.h |    2 +-
> >  virt/kvm/irq_comm.c      |   55 +++++++++++++++++++++-------------------------
> >  virt/kvm/kvm_main.c      |    1 -
> >  3 files changed, 26 insertions(+), 32 deletions(-)
> > 
> > diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
> > index f54a0d3..6756b3e 100644
> > --- a/include/linux/kvm_host.h
> > +++ b/include/linux/kvm_host.h
> > @@ -161,7 +161,7 @@ struct kvm {
> >  
> >  	struct mutex irq_lock;
> >  #ifdef CONFIG_HAVE_KVM_IRQCHIP
> > -	struct list_head irq_routing; /* of kvm_kernel_irq_routing_entry */
> > +	struct kvm_kernel_irq_routing_entry *irq_routing;
> >  	struct hlist_head mask_notifier_list;
> >  #endif
> >  
> > diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c
> > index 7af18b8..b2fa3f6 100644
> > --- a/virt/kvm/irq_comm.c
> > +++ b/virt/kvm/irq_comm.c
> > @@ -148,7 +148,8 @@ int kvm_set_irq(struct kvm *kvm, int irq_source_id, int irq, int level)
> >  	 * IOAPIC.  So set the bit in both. The guest will ignore
> >  	 * writes to the unused one.
> >  	 */
> > -	list_for_each_entry(e, &kvm->irq_routing, link)
> > +	rcu_read_lock();
> > +	for (e = rcu_dereference(kvm->irq_routing); e && e->set; e++) {
> >  		if (e->gsi == irq) {
> >  			int r = e->set(e, kvm, sig_level);
> >  			if (r < 0)
> > @@ -156,6 +157,8 @@ int kvm_set_irq(struct kvm *kvm, int irq_source_id, int irq, int level)
> >  
> >  			ret = r + ((ret < 0) ? 0 : ret);
> >  		}
> > +	}
> > +	rcu_read_unlock();
> >  	return ret;
> >  }
> >  
> > @@ -168,12 +171,15 @@ void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin)
> >  
> >  	trace_kvm_ack_irq(irqchip, pin);
> >  
> > -	list_for_each_entry(e, &kvm->irq_routing, link)
> > +	rcu_read_lock();
> > +	for (e = rcu_dereference(kvm->irq_routing); e && e->set; e++) {
> >  		if (e->irqchip.irqchip == irqchip &&
> >  		    e->irqchip.pin == pin) {
> >  			gsi = e->gsi;
> >  			break;
> >  		}
> > +	}
> > +	rcu_read_unlock();
> >  
> >  	hlist_for_each_entry(kian, n, &kvm->arch.irq_ack_notifier_list, link)
> >  		if (kian->gsi == gsi)
> > @@ -264,19 +270,11 @@ void kvm_fire_mask_notifiers(struct kvm *kvm, int irq, bool mask)
> >  			kimn->func(kimn, mask);
> >  }
> >  
> > -static void __kvm_free_irq_routing(struct list_head *irq_routing)
> > -{
> > -	struct kvm_kernel_irq_routing_entry *e, *n;
> > -
> > -	list_for_each_entry_safe(e, n, irq_routing, link)
> > -		kfree(e);
> > -}
> > -
> >  void kvm_free_irq_routing(struct kvm *kvm)
> >  {
> > -	mutex_lock(&kvm->irq_lock);
> > -	__kvm_free_irq_routing(&kvm->irq_routing);
> > -	mutex_unlock(&kvm->irq_lock);
> > +	/* Called only during vm destruction. Nobody can use the pointer
> > +	   at this stage */
> > +	kfree(kvm->irq_routing);
> >  }
> >  
> >  static int setup_routing_entry(struct kvm_kernel_irq_routing_entry *e,
> > @@ -326,43 +324,40 @@ int kvm_set_irq_routing(struct kvm *kvm,
> >  			unsigned nr,
> >  			unsigned flags)
> >  {
> > -	struct list_head irq_list = LIST_HEAD_INIT(irq_list);
> > -	struct list_head tmp = LIST_HEAD_INIT(tmp);
> > -	struct kvm_kernel_irq_routing_entry *e = NULL;
> > +	struct kvm_kernel_irq_routing_entry *new, *old;
> >  	unsigned i;
> >  	int r;
> >  
> > +	/* last element is left zeroed and indicates the end of the array */
> > +	new = kzalloc(sizeof(*new) * (nr + 1), GFP_KERNEL);
> 
> There are up to 1K entries, and each one seems to be around 32 bytes.
> Are there chances that we won't be able to find such a chunk of
> contiguous memory on a busy system?  
> 
> Since the tmp list is never traversed while it is assigned, we can, instead,
> build a new list as we did and simply replace list_splice with these bits from
> list_splice_init_rcu:
> 
> static inline void list_splice_tmp_rcu(struct list_head *tmp,
> 				       struct list_head *head) {
> 	struct list_head *first = tmp->next;
> 	struct list_head *last = tmp->prev;
> 	struct list_head *at = head->next;
> 	last->next = at;
> 	rcu_assign_pointer(head->next, first);
> 	first->prev = head;
> 	at->prev = last;
> }
> 
Lets keep simple things simple. If there is real concern that 3-4
contiguous page will not be available we can use vmalloc() here. But the
not so long term plan is to not use irq routing table for msi injection
(new ioctl kvm_msi_inject) and reduce table to much smaller size (may be
make it hash).

> 
> > +
> > +	if (!new)
> > +		return -ENOMEM;
> > +
> >  	for (i = 0; i < nr; ++i) {
> >  		r = -EINVAL;
> >  		if (ue->gsi >= KVM_MAX_IRQ_ROUTES)
> >  			goto out;
> >  		if (ue->flags)
> >  			goto out;
> > -		r = -ENOMEM;
> > -		e = kzalloc(sizeof(*e), GFP_KERNEL);
> > -		if (!e)
> > -			goto out;
> > -		r = setup_routing_entry(e, ue);
> > +		r = setup_routing_entry(new + i, ue);
> >  		if (r)
> >  			goto out;
> >  		++ue;
> > -		list_add(&e->link, &irq_list);
> > -		e = NULL;
> >  	}
> >  
> >  	mutex_lock(&kvm->irq_lock);
> > -	list_splice(&kvm->irq_routing, &tmp);
> > -	INIT_LIST_HEAD(&kvm->irq_routing);
> > -	list_splice(&irq_list, &kvm->irq_routing);
> > -	INIT_LIST_HEAD(&irq_list);
> > -	list_splice(&tmp, &irq_list);
> > +	old = kvm->irq_routing;
> > +	rcu_assign_pointer(kvm->irq_routing, new);
> >  	mutex_unlock(&kvm->irq_lock);
> >  
> > +	synchronize_rcu();
> > +
> >  	r = 0;
> > +	new = old;
> >  
> >  out:
> > -	kfree(e);
> > -	__kvm_free_irq_routing(&irq_list);
> > +	kfree(new);
> >  	return r;
> >  }
> >  
> > diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
> > index cf20dc1..24013b4 100644
> > --- a/virt/kvm/kvm_main.c
> > +++ b/virt/kvm/kvm_main.c
> > @@ -945,7 +945,6 @@ static struct kvm *kvm_create_vm(void)
> >  	if (IS_ERR(kvm))
> >  		goto out;
> >  #ifdef CONFIG_HAVE_KVM_IRQCHIP
> > -	INIT_LIST_HEAD(&kvm->irq_routing);
> >  	INIT_HLIST_HEAD(&kvm->mask_notifier_list);
> >  #endif
> >  
> > -- 
> > 1.6.2.1
> > 
> > --
> > To unsubscribe from this list: send the line "unsubscribe kvm" in
> > the body of a message to majordomo@vger.kernel.org
> > More majordomo info at  http://vger.kernel.org/majordomo-info.html

--
			Gleb.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Michael S. Tsirkin July 13, 2009, 1:15 p.m. UTC | #4
On Mon, Jul 13, 2009 at 04:03:10PM +0300, Gleb Natapov wrote:
> On Mon, Jul 13, 2009 at 03:55:07PM +0300, Michael S. Tsirkin wrote:
> > On Sun, Jul 12, 2009 at 03:03:50PM +0300, Gleb Natapov wrote:
> > > 
> > > Signed-off-by: Gleb Natapov <gleb@redhat.com>
> > > ---
> > >  include/linux/kvm_host.h |    2 +-
> > >  virt/kvm/irq_comm.c      |   55 +++++++++++++++++++++-------------------------
> > >  virt/kvm/kvm_main.c      |    1 -
> > >  3 files changed, 26 insertions(+), 32 deletions(-)
> > > 
> > > diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
> > > index f54a0d3..6756b3e 100644
> > > --- a/include/linux/kvm_host.h
> > > +++ b/include/linux/kvm_host.h
> > > @@ -161,7 +161,7 @@ struct kvm {
> > >  
> > >  	struct mutex irq_lock;
> > >  #ifdef CONFIG_HAVE_KVM_IRQCHIP
> > > -	struct list_head irq_routing; /* of kvm_kernel_irq_routing_entry */
> > > +	struct kvm_kernel_irq_routing_entry *irq_routing;
> > >  	struct hlist_head mask_notifier_list;
> > >  #endif
> > >  
> > > diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c
> > > index 7af18b8..b2fa3f6 100644
> > > --- a/virt/kvm/irq_comm.c
> > > +++ b/virt/kvm/irq_comm.c
> > > @@ -148,7 +148,8 @@ int kvm_set_irq(struct kvm *kvm, int irq_source_id, int irq, int level)
> > >  	 * IOAPIC.  So set the bit in both. The guest will ignore
> > >  	 * writes to the unused one.
> > >  	 */
> > > -	list_for_each_entry(e, &kvm->irq_routing, link)
> > > +	rcu_read_lock();
> > > +	for (e = rcu_dereference(kvm->irq_routing); e && e->set; e++) {
> > >  		if (e->gsi == irq) {
> > >  			int r = e->set(e, kvm, sig_level);
> > >  			if (r < 0)
> > > @@ -156,6 +157,8 @@ int kvm_set_irq(struct kvm *kvm, int irq_source_id, int irq, int level)
> > >  
> > >  			ret = r + ((ret < 0) ? 0 : ret);
> > >  		}
> > > +	}
> > > +	rcu_read_unlock();
> > >  	return ret;
> > >  }
> > >  
> > > @@ -168,12 +171,15 @@ void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin)
> > >  
> > >  	trace_kvm_ack_irq(irqchip, pin);
> > >  
> > > -	list_for_each_entry(e, &kvm->irq_routing, link)
> > > +	rcu_read_lock();
> > > +	for (e = rcu_dereference(kvm->irq_routing); e && e->set; e++) {
> > >  		if (e->irqchip.irqchip == irqchip &&
> > >  		    e->irqchip.pin == pin) {
> > >  			gsi = e->gsi;
> > >  			break;
> > >  		}
> > > +	}
> > > +	rcu_read_unlock();
> > >  
> > >  	hlist_for_each_entry(kian, n, &kvm->arch.irq_ack_notifier_list, link)
> > >  		if (kian->gsi == gsi)
> > > @@ -264,19 +270,11 @@ void kvm_fire_mask_notifiers(struct kvm *kvm, int irq, bool mask)
> > >  			kimn->func(kimn, mask);
> > >  }
> > >  
> > > -static void __kvm_free_irq_routing(struct list_head *irq_routing)
> > > -{
> > > -	struct kvm_kernel_irq_routing_entry *e, *n;
> > > -
> > > -	list_for_each_entry_safe(e, n, irq_routing, link)
> > > -		kfree(e);
> > > -}
> > > -
> > >  void kvm_free_irq_routing(struct kvm *kvm)
> > >  {
> > > -	mutex_lock(&kvm->irq_lock);
> > > -	__kvm_free_irq_routing(&kvm->irq_routing);
> > > -	mutex_unlock(&kvm->irq_lock);
> > > +	/* Called only during vm destruction. Nobody can use the pointer
> > > +	   at this stage */
> > > +	kfree(kvm->irq_routing);
> > >  }
> > >  
> > >  static int setup_routing_entry(struct kvm_kernel_irq_routing_entry *e,
> > > @@ -326,43 +324,40 @@ int kvm_set_irq_routing(struct kvm *kvm,
> > >  			unsigned nr,
> > >  			unsigned flags)
> > >  {
> > > -	struct list_head irq_list = LIST_HEAD_INIT(irq_list);
> > > -	struct list_head tmp = LIST_HEAD_INIT(tmp);
> > > -	struct kvm_kernel_irq_routing_entry *e = NULL;
> > > +	struct kvm_kernel_irq_routing_entry *new, *old;
> > >  	unsigned i;
> > >  	int r;
> > >  
> > > +	/* last element is left zeroed and indicates the end of the array */
> > > +	new = kzalloc(sizeof(*new) * (nr + 1), GFP_KERNEL);
> > 
> > There are up to 1K entries, and each one seems to be around 32 bytes.
> > Are there chances that we won't be able to find such a chunk of
> > contiguous memory on a busy system?  
> > 
> > Since the tmp list is never traversed while it is assigned, we can, instead,
> > build a new list as we did and simply replace list_splice with these bits from
> > list_splice_init_rcu:
> > 
> > static inline void list_splice_tmp_rcu(struct list_head *tmp,
> > 				       struct list_head *head) {
> > 	struct list_head *first = tmp->next;
> > 	struct list_head *last = tmp->prev;
> > 	struct list_head *at = head->next;
> > 	last->next = at;
> > 	rcu_assign_pointer(head->next, first);
> > 	first->prev = head;
> > 	at->prev = last;
> > }
> > 
> Lets keep simple things simple. If there is real concern that 3-4
> contiguous page will not be available we can use vmalloc() here.

Hmm, 32 * (1K + 1) is usually 8-9 pages, and vmalloc is a finite
resource. Maybe it's a good idea to use an array instead of a list. All
I'm saying, RCU does not force you to do this.

> But the
> not so long term plan is to not use irq routing table for msi injection
> (new ioctl kvm_msi_inject) and reduce table to much smaller size (may be
> make it hash).

Why bother with an array as an intermediate step then?

> > 
> > > +
> > > +	if (!new)
> > > +		return -ENOMEM;
> > > +
> > >  	for (i = 0; i < nr; ++i) {
> > >  		r = -EINVAL;
> > >  		if (ue->gsi >= KVM_MAX_IRQ_ROUTES)
> > >  			goto out;
> > >  		if (ue->flags)
> > >  			goto out;
> > > -		r = -ENOMEM;
> > > -		e = kzalloc(sizeof(*e), GFP_KERNEL);
> > > -		if (!e)
> > > -			goto out;
> > > -		r = setup_routing_entry(e, ue);
> > > +		r = setup_routing_entry(new + i, ue);
> > >  		if (r)
> > >  			goto out;
> > >  		++ue;
> > > -		list_add(&e->link, &irq_list);
> > > -		e = NULL;
> > >  	}
> > >  
> > >  	mutex_lock(&kvm->irq_lock);
> > > -	list_splice(&kvm->irq_routing, &tmp);
> > > -	INIT_LIST_HEAD(&kvm->irq_routing);
> > > -	list_splice(&irq_list, &kvm->irq_routing);
> > > -	INIT_LIST_HEAD(&irq_list);
> > > -	list_splice(&tmp, &irq_list);
> > > +	old = kvm->irq_routing;
> > > +	rcu_assign_pointer(kvm->irq_routing, new);
> > >  	mutex_unlock(&kvm->irq_lock);
> > >  
> > > +	synchronize_rcu();
> > > +
> > >  	r = 0;
> > > +	new = old;
> > >  
> > >  out:
> > > -	kfree(e);
> > > -	__kvm_free_irq_routing(&irq_list);
> > > +	kfree(new);
> > >  	return r;
> > >  }
> > >  
> > > diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
> > > index cf20dc1..24013b4 100644
> > > --- a/virt/kvm/kvm_main.c
> > > +++ b/virt/kvm/kvm_main.c
> > > @@ -945,7 +945,6 @@ static struct kvm *kvm_create_vm(void)
> > >  	if (IS_ERR(kvm))
> > >  		goto out;
> > >  #ifdef CONFIG_HAVE_KVM_IRQCHIP
> > > -	INIT_LIST_HEAD(&kvm->irq_routing);
> > >  	INIT_HLIST_HEAD(&kvm->mask_notifier_list);
> > >  #endif
> > >  
> > > -- 
> > > 1.6.2.1
> > > 
> > > --
> > > To unsubscribe from this list: send the line "unsubscribe kvm" in
> > > the body of a message to majordomo@vger.kernel.org
> > > More majordomo info at  http://vger.kernel.org/majordomo-info.html
> 
> --
> 			Gleb.
> --
> To unsubscribe from this list: send the line "unsubscribe kvm" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Gleb Natapov July 13, 2009, 1:15 p.m. UTC | #5
On Mon, Jul 13, 2009 at 09:01:33AM -0400, Gregory Haskins wrote:
> Gleb Natapov wrote:
> > Signed-off-by: Gleb Natapov <gleb@redhat.com>
> > ---
> >  include/linux/kvm_host.h |    2 +-
> >  virt/kvm/irq_comm.c      |   55 +++++++++++++++++++++-------------------------
> >  virt/kvm/kvm_main.c      |    1 -
> >  3 files changed, 26 insertions(+), 32 deletions(-)
> >
> > diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
> > index f54a0d3..6756b3e 100644
> > --- a/include/linux/kvm_host.h
> > +++ b/include/linux/kvm_host.h
> > @@ -161,7 +161,7 @@ struct kvm {
> >  
> >  	struct mutex irq_lock;
> >  #ifdef CONFIG_HAVE_KVM_IRQCHIP
> > -	struct list_head irq_routing; /* of kvm_kernel_irq_routing_entry */
> > +	struct kvm_kernel_irq_routing_entry *irq_routing;
> >  	struct hlist_head mask_notifier_list;
> >  #endif
> >  
> > diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c
> > index 7af18b8..b2fa3f6 100644
> > --- a/virt/kvm/irq_comm.c
> > +++ b/virt/kvm/irq_comm.c
> > @@ -148,7 +148,8 @@ int kvm_set_irq(struct kvm *kvm, int irq_source_id, int irq, int level)
> >  	 * IOAPIC.  So set the bit in both. The guest will ignore
> >  	 * writes to the unused one.
> >  	 */
> > -	list_for_each_entry(e, &kvm->irq_routing, link)
> > +	rcu_read_lock();
> > +	for (e = rcu_dereference(kvm->irq_routing); e && e->set; e++) {
> >   
> 
> Hi Gleb,
>   I haven't had a chance to fully digest and review these patches, but
> one thing I did notice is that you seem to be converting from a list to
> an open-coded structure.  I am just curious why you made this design
> decision instead of using the RCU variant of list?
> 
It is not scary "open-coded structure" it's just an array :) As I responded
to Michael the idea is to move msis out of irq_routing, make the array
much smaller and either use gsi as an index in the array or use hash table
instead looping over all entries. For now I can justify array as more
cache friendly data structure as we scan it linearly.

> Regards,
> -Greg
> 
> >  		if (e->gsi == irq) {
> >  			int r = e->set(e, kvm, sig_level);
> >  			if (r < 0)
> > @@ -156,6 +157,8 @@ int kvm_set_irq(struct kvm *kvm, int irq_source_id, int irq, int level)
> >  
> >  			ret = r + ((ret < 0) ? 0 : ret);
> >  		}
> > +	}
> > +	rcu_read_unlock();
> >  	return ret;
> >  }
> >  
> > @@ -168,12 +171,15 @@ void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin)
> >  
> >  	trace_kvm_ack_irq(irqchip, pin);
> >  
> > -	list_for_each_entry(e, &kvm->irq_routing, link)
> > +	rcu_read_lock();
> > +	for (e = rcu_dereference(kvm->irq_routing); e && e->set; e++) {
> >  		if (e->irqchip.irqchip == irqchip &&
> >  		    e->irqchip.pin == pin) {
> >  			gsi = e->gsi;
> >  			break;
> >  		}
> > +	}
> > +	rcu_read_unlock();
> >  
> >  	hlist_for_each_entry(kian, n, &kvm->arch.irq_ack_notifier_list, link)
> >  		if (kian->gsi == gsi)
> > @@ -264,19 +270,11 @@ void kvm_fire_mask_notifiers(struct kvm *kvm, int irq, bool mask)
> >  			kimn->func(kimn, mask);
> >  }
> >  
> > -static void __kvm_free_irq_routing(struct list_head *irq_routing)
> > -{
> > -	struct kvm_kernel_irq_routing_entry *e, *n;
> > -
> > -	list_for_each_entry_safe(e, n, irq_routing, link)
> > -		kfree(e);
> > -}
> > -
> >  void kvm_free_irq_routing(struct kvm *kvm)
> >  {
> > -	mutex_lock(&kvm->irq_lock);
> > -	__kvm_free_irq_routing(&kvm->irq_routing);
> > -	mutex_unlock(&kvm->irq_lock);
> > +	/* Called only during vm destruction. Nobody can use the pointer
> > +	   at this stage */
> > +	kfree(kvm->irq_routing);
> >  }
> >  
> >  static int setup_routing_entry(struct kvm_kernel_irq_routing_entry *e,
> > @@ -326,43 +324,40 @@ int kvm_set_irq_routing(struct kvm *kvm,
> >  			unsigned nr,
> >  			unsigned flags)
> >  {
> > -	struct list_head irq_list = LIST_HEAD_INIT(irq_list);
> > -	struct list_head tmp = LIST_HEAD_INIT(tmp);
> > -	struct kvm_kernel_irq_routing_entry *e = NULL;
> > +	struct kvm_kernel_irq_routing_entry *new, *old;
> >  	unsigned i;
> >  	int r;
> >  
> > +	/* last element is left zeroed and indicates the end of the array */
> > +	new = kzalloc(sizeof(*new) * (nr + 1), GFP_KERNEL);
> > +
> > +	if (!new)
> > +		return -ENOMEM;
> > +
> >  	for (i = 0; i < nr; ++i) {
> >  		r = -EINVAL;
> >  		if (ue->gsi >= KVM_MAX_IRQ_ROUTES)
> >  			goto out;
> >  		if (ue->flags)
> >  			goto out;
> > -		r = -ENOMEM;
> > -		e = kzalloc(sizeof(*e), GFP_KERNEL);
> > -		if (!e)
> > -			goto out;
> > -		r = setup_routing_entry(e, ue);
> > +		r = setup_routing_entry(new + i, ue);
> >  		if (r)
> >  			goto out;
> >  		++ue;
> > -		list_add(&e->link, &irq_list);
> > -		e = NULL;
> >  	}
> >  
> >  	mutex_lock(&kvm->irq_lock);
> > -	list_splice(&kvm->irq_routing, &tmp);
> > -	INIT_LIST_HEAD(&kvm->irq_routing);
> > -	list_splice(&irq_list, &kvm->irq_routing);
> > -	INIT_LIST_HEAD(&irq_list);
> > -	list_splice(&tmp, &irq_list);
> > +	old = kvm->irq_routing;
> > +	rcu_assign_pointer(kvm->irq_routing, new);
> >  	mutex_unlock(&kvm->irq_lock);
> >  
> > +	synchronize_rcu();
> > +
> >  	r = 0;
> > +	new = old;
> >  
> >  out:
> > -	kfree(e);
> > -	__kvm_free_irq_routing(&irq_list);
> > +	kfree(new);
> >  	return r;
> >  }
> >  
> > diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
> > index cf20dc1..24013b4 100644
> > --- a/virt/kvm/kvm_main.c
> > +++ b/virt/kvm/kvm_main.c
> > @@ -945,7 +945,6 @@ static struct kvm *kvm_create_vm(void)
> >  	if (IS_ERR(kvm))
> >  		goto out;
> >  #ifdef CONFIG_HAVE_KVM_IRQCHIP
> > -	INIT_LIST_HEAD(&kvm->irq_routing);
> >  	INIT_HLIST_HEAD(&kvm->mask_notifier_list);
> >  #endif
> >  
> >   
> 
> 



--
			Gleb.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Gregory Haskins July 13, 2009, 1:16 p.m. UTC | #6
Gleb Natapov wrote:
> On Mon, Jul 13, 2009 at 09:01:33AM -0400, Gregory Haskins wrote:
>   
>> Gleb Natapov wrote:
>>     
>>> Signed-off-by: Gleb Natapov <gleb@redhat.com>
>>> ---
>>>  include/linux/kvm_host.h |    2 +-
>>>  virt/kvm/irq_comm.c      |   55 +++++++++++++++++++++-------------------------
>>>  virt/kvm/kvm_main.c      |    1 -
>>>  3 files changed, 26 insertions(+), 32 deletions(-)
>>>
>>> diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
>>> index f54a0d3..6756b3e 100644
>>> --- a/include/linux/kvm_host.h
>>> +++ b/include/linux/kvm_host.h
>>> @@ -161,7 +161,7 @@ struct kvm {
>>>  
>>>  	struct mutex irq_lock;
>>>  #ifdef CONFIG_HAVE_KVM_IRQCHIP
>>> -	struct list_head irq_routing; /* of kvm_kernel_irq_routing_entry */
>>> +	struct kvm_kernel_irq_routing_entry *irq_routing;
>>>  	struct hlist_head mask_notifier_list;
>>>  #endif
>>>  
>>> diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c
>>> index 7af18b8..b2fa3f6 100644
>>> --- a/virt/kvm/irq_comm.c
>>> +++ b/virt/kvm/irq_comm.c
>>> @@ -148,7 +148,8 @@ int kvm_set_irq(struct kvm *kvm, int irq_source_id, int irq, int level)
>>>  	 * IOAPIC.  So set the bit in both. The guest will ignore
>>>  	 * writes to the unused one.
>>>  	 */
>>> -	list_for_each_entry(e, &kvm->irq_routing, link)
>>> +	rcu_read_lock();
>>> +	for (e = rcu_dereference(kvm->irq_routing); e && e->set; e++) {
>>>   
>>>       
>> Hi Gleb,
>>   I haven't had a chance to fully digest and review these patches, but
>> one thing I did notice is that you seem to be converting from a list to
>> an open-coded structure.  I am just curious why you made this design
>> decision instead of using the RCU variant of list?
>>
>>     
> It is not scary "open-coded structure" it's just an array :) As I responded
> to Michael the idea is to move msis out of irq_routing, make the array
> much smaller and either use gsi as an index in the array or use hash table
> instead looping over all entries. For now I can justify array as more
> cache friendly data structure as we scan it linearly.
>   

Ok, but that might be a good thing to mention in the patch header ;)

Kind Regards,
-Greg

>   
>> Regards,
>> -Greg
>>
>>     
>>>  		if (e->gsi == irq) {
>>>  			int r = e->set(e, kvm, sig_level);
>>>  			if (r < 0)
>>> @@ -156,6 +157,8 @@ int kvm_set_irq(struct kvm *kvm, int irq_source_id, int irq, int level)
>>>  
>>>  			ret = r + ((ret < 0) ? 0 : ret);
>>>  		}
>>> +	}
>>> +	rcu_read_unlock();
>>>  	return ret;
>>>  }
>>>  
>>> @@ -168,12 +171,15 @@ void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin)
>>>  
>>>  	trace_kvm_ack_irq(irqchip, pin);
>>>  
>>> -	list_for_each_entry(e, &kvm->irq_routing, link)
>>> +	rcu_read_lock();
>>> +	for (e = rcu_dereference(kvm->irq_routing); e && e->set; e++) {
>>>  		if (e->irqchip.irqchip == irqchip &&
>>>  		    e->irqchip.pin == pin) {
>>>  			gsi = e->gsi;
>>>  			break;
>>>  		}
>>> +	}
>>> +	rcu_read_unlock();
>>>  
>>>  	hlist_for_each_entry(kian, n, &kvm->arch.irq_ack_notifier_list, link)
>>>  		if (kian->gsi == gsi)
>>> @@ -264,19 +270,11 @@ void kvm_fire_mask_notifiers(struct kvm *kvm, int irq, bool mask)
>>>  			kimn->func(kimn, mask);
>>>  }
>>>  
>>> -static void __kvm_free_irq_routing(struct list_head *irq_routing)
>>> -{
>>> -	struct kvm_kernel_irq_routing_entry *e, *n;
>>> -
>>> -	list_for_each_entry_safe(e, n, irq_routing, link)
>>> -		kfree(e);
>>> -}
>>> -
>>>  void kvm_free_irq_routing(struct kvm *kvm)
>>>  {
>>> -	mutex_lock(&kvm->irq_lock);
>>> -	__kvm_free_irq_routing(&kvm->irq_routing);
>>> -	mutex_unlock(&kvm->irq_lock);
>>> +	/* Called only during vm destruction. Nobody can use the pointer
>>> +	   at this stage */
>>> +	kfree(kvm->irq_routing);
>>>  }
>>>  
>>>  static int setup_routing_entry(struct kvm_kernel_irq_routing_entry *e,
>>> @@ -326,43 +324,40 @@ int kvm_set_irq_routing(struct kvm *kvm,
>>>  			unsigned nr,
>>>  			unsigned flags)
>>>  {
>>> -	struct list_head irq_list = LIST_HEAD_INIT(irq_list);
>>> -	struct list_head tmp = LIST_HEAD_INIT(tmp);
>>> -	struct kvm_kernel_irq_routing_entry *e = NULL;
>>> +	struct kvm_kernel_irq_routing_entry *new, *old;
>>>  	unsigned i;
>>>  	int r;
>>>  
>>> +	/* last element is left zeroed and indicates the end of the array */
>>> +	new = kzalloc(sizeof(*new) * (nr + 1), GFP_KERNEL);
>>> +
>>> +	if (!new)
>>> +		return -ENOMEM;
>>> +
>>>  	for (i = 0; i < nr; ++i) {
>>>  		r = -EINVAL;
>>>  		if (ue->gsi >= KVM_MAX_IRQ_ROUTES)
>>>  			goto out;
>>>  		if (ue->flags)
>>>  			goto out;
>>> -		r = -ENOMEM;
>>> -		e = kzalloc(sizeof(*e), GFP_KERNEL);
>>> -		if (!e)
>>> -			goto out;
>>> -		r = setup_routing_entry(e, ue);
>>> +		r = setup_routing_entry(new + i, ue);
>>>  		if (r)
>>>  			goto out;
>>>  		++ue;
>>> -		list_add(&e->link, &irq_list);
>>> -		e = NULL;
>>>  	}
>>>  
>>>  	mutex_lock(&kvm->irq_lock);
>>> -	list_splice(&kvm->irq_routing, &tmp);
>>> -	INIT_LIST_HEAD(&kvm->irq_routing);
>>> -	list_splice(&irq_list, &kvm->irq_routing);
>>> -	INIT_LIST_HEAD(&irq_list);
>>> -	list_splice(&tmp, &irq_list);
>>> +	old = kvm->irq_routing;
>>> +	rcu_assign_pointer(kvm->irq_routing, new);
>>>  	mutex_unlock(&kvm->irq_lock);
>>>  
>>> +	synchronize_rcu();
>>> +
>>>  	r = 0;
>>> +	new = old;
>>>  
>>>  out:
>>> -	kfree(e);
>>> -	__kvm_free_irq_routing(&irq_list);
>>> +	kfree(new);
>>>  	return r;
>>>  }
>>>  
>>> diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
>>> index cf20dc1..24013b4 100644
>>> --- a/virt/kvm/kvm_main.c
>>> +++ b/virt/kvm/kvm_main.c
>>> @@ -945,7 +945,6 @@ static struct kvm *kvm_create_vm(void)
>>>  	if (IS_ERR(kvm))
>>>  		goto out;
>>>  #ifdef CONFIG_HAVE_KVM_IRQCHIP
>>> -	INIT_LIST_HEAD(&kvm->irq_routing);
>>>  	INIT_HLIST_HEAD(&kvm->mask_notifier_list);
>>>  #endif
>>>  
>>>   
>>>       
>>     
>
>
>
> --
> 			Gleb.
> --
> To unsubscribe from this list: send the line "unsubscribe kvm" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
>
Gleb Natapov July 13, 2009, 1:23 p.m. UTC | #7
On Mon, Jul 13, 2009 at 04:15:30PM +0300, Michael S. Tsirkin wrote:
> On Mon, Jul 13, 2009 at 04:03:10PM +0300, Gleb Natapov wrote:
> > On Mon, Jul 13, 2009 at 03:55:07PM +0300, Michael S. Tsirkin wrote:
> > > On Sun, Jul 12, 2009 at 03:03:50PM +0300, Gleb Natapov wrote:
> > > > 
> > > > Signed-off-by: Gleb Natapov <gleb@redhat.com>
> > > > ---
> > > >  include/linux/kvm_host.h |    2 +-
> > > >  virt/kvm/irq_comm.c      |   55 +++++++++++++++++++++-------------------------
> > > >  virt/kvm/kvm_main.c      |    1 -
> > > >  3 files changed, 26 insertions(+), 32 deletions(-)
> > > > 
> > > > diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
> > > > index f54a0d3..6756b3e 100644
> > > > --- a/include/linux/kvm_host.h
> > > > +++ b/include/linux/kvm_host.h
> > > > @@ -161,7 +161,7 @@ struct kvm {
> > > >  
> > > >  	struct mutex irq_lock;
> > > >  #ifdef CONFIG_HAVE_KVM_IRQCHIP
> > > > -	struct list_head irq_routing; /* of kvm_kernel_irq_routing_entry */
> > > > +	struct kvm_kernel_irq_routing_entry *irq_routing;
> > > >  	struct hlist_head mask_notifier_list;
> > > >  #endif
> > > >  
> > > > diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c
> > > > index 7af18b8..b2fa3f6 100644
> > > > --- a/virt/kvm/irq_comm.c
> > > > +++ b/virt/kvm/irq_comm.c
> > > > @@ -148,7 +148,8 @@ int kvm_set_irq(struct kvm *kvm, int irq_source_id, int irq, int level)
> > > >  	 * IOAPIC.  So set the bit in both. The guest will ignore
> > > >  	 * writes to the unused one.
> > > >  	 */
> > > > -	list_for_each_entry(e, &kvm->irq_routing, link)
> > > > +	rcu_read_lock();
> > > > +	for (e = rcu_dereference(kvm->irq_routing); e && e->set; e++) {
> > > >  		if (e->gsi == irq) {
> > > >  			int r = e->set(e, kvm, sig_level);
> > > >  			if (r < 0)
> > > > @@ -156,6 +157,8 @@ int kvm_set_irq(struct kvm *kvm, int irq_source_id, int irq, int level)
> > > >  
> > > >  			ret = r + ((ret < 0) ? 0 : ret);
> > > >  		}
> > > > +	}
> > > > +	rcu_read_unlock();
> > > >  	return ret;
> > > >  }
> > > >  
> > > > @@ -168,12 +171,15 @@ void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin)
> > > >  
> > > >  	trace_kvm_ack_irq(irqchip, pin);
> > > >  
> > > > -	list_for_each_entry(e, &kvm->irq_routing, link)
> > > > +	rcu_read_lock();
> > > > +	for (e = rcu_dereference(kvm->irq_routing); e && e->set; e++) {
> > > >  		if (e->irqchip.irqchip == irqchip &&
> > > >  		    e->irqchip.pin == pin) {
> > > >  			gsi = e->gsi;
> > > >  			break;
> > > >  		}
> > > > +	}
> > > > +	rcu_read_unlock();
> > > >  
> > > >  	hlist_for_each_entry(kian, n, &kvm->arch.irq_ack_notifier_list, link)
> > > >  		if (kian->gsi == gsi)
> > > > @@ -264,19 +270,11 @@ void kvm_fire_mask_notifiers(struct kvm *kvm, int irq, bool mask)
> > > >  			kimn->func(kimn, mask);
> > > >  }
> > > >  
> > > > -static void __kvm_free_irq_routing(struct list_head *irq_routing)
> > > > -{
> > > > -	struct kvm_kernel_irq_routing_entry *e, *n;
> > > > -
> > > > -	list_for_each_entry_safe(e, n, irq_routing, link)
> > > > -		kfree(e);
> > > > -}
> > > > -
> > > >  void kvm_free_irq_routing(struct kvm *kvm)
> > > >  {
> > > > -	mutex_lock(&kvm->irq_lock);
> > > > -	__kvm_free_irq_routing(&kvm->irq_routing);
> > > > -	mutex_unlock(&kvm->irq_lock);
> > > > +	/* Called only during vm destruction. Nobody can use the pointer
> > > > +	   at this stage */
> > > > +	kfree(kvm->irq_routing);
> > > >  }
> > > >  
> > > >  static int setup_routing_entry(struct kvm_kernel_irq_routing_entry *e,
> > > > @@ -326,43 +324,40 @@ int kvm_set_irq_routing(struct kvm *kvm,
> > > >  			unsigned nr,
> > > >  			unsigned flags)
> > > >  {
> > > > -	struct list_head irq_list = LIST_HEAD_INIT(irq_list);
> > > > -	struct list_head tmp = LIST_HEAD_INIT(tmp);
> > > > -	struct kvm_kernel_irq_routing_entry *e = NULL;
> > > > +	struct kvm_kernel_irq_routing_entry *new, *old;
> > > >  	unsigned i;
> > > >  	int r;
> > > >  
> > > > +	/* last element is left zeroed and indicates the end of the array */
> > > > +	new = kzalloc(sizeof(*new) * (nr + 1), GFP_KERNEL);
> > > 
> > > There are up to 1K entries, and each one seems to be around 32 bytes.
> > > Are there chances that we won't be able to find such a chunk of
> > > contiguous memory on a busy system?  
> > > 
> > > Since the tmp list is never traversed while it is assigned, we can, instead,
> > > build a new list as we did and simply replace list_splice with these bits from
> > > list_splice_init_rcu:
> > > 
> > > static inline void list_splice_tmp_rcu(struct list_head *tmp,
> > > 				       struct list_head *head) {
> > > 	struct list_head *first = tmp->next;
> > > 	struct list_head *last = tmp->prev;
> > > 	struct list_head *at = head->next;
> > > 	last->next = at;
> > > 	rcu_assign_pointer(head->next, first);
> > > 	first->prev = head;
> > > 	at->prev = last;
> > > }
> > > 
> > Lets keep simple things simple. If there is real concern that 3-4
> > contiguous page will not be available we can use vmalloc() here.
> 
> Hmm, 32 * (1K + 1) is usually 8-9 pages, and vmalloc is a finite
We allocate only existing entries, not the whole array, and this usually
means less then 20 entries. If we will get to the point where with
current data structure we will use 1K entries much more serious problem
will be that for each injected interrupt we will have to scan 1K entries.

> resource. Maybe it's a good idea to use an array instead of a list. All
> I'm saying, RCU does not force you to do this.
> 
It doesn't, but list shouldn't be used here in the first place.

> > But the
> > not so long term plan is to not use irq routing table for msi injection
> > (new ioctl kvm_msi_inject) and reduce table to much smaller size (may be
> > make it hash).
> 
> Why bother with an array as an intermediate step then?
Incremental changes. I can't rewrite the whole kernel with one patch.
Linus will reject it.

> 
> > > 
> > > > +
> > > > +	if (!new)
> > > > +		return -ENOMEM;
> > > > +
> > > >  	for (i = 0; i < nr; ++i) {
> > > >  		r = -EINVAL;
> > > >  		if (ue->gsi >= KVM_MAX_IRQ_ROUTES)
> > > >  			goto out;
> > > >  		if (ue->flags)
> > > >  			goto out;
> > > > -		r = -ENOMEM;
> > > > -		e = kzalloc(sizeof(*e), GFP_KERNEL);
> > > > -		if (!e)
> > > > -			goto out;
> > > > -		r = setup_routing_entry(e, ue);
> > > > +		r = setup_routing_entry(new + i, ue);
> > > >  		if (r)
> > > >  			goto out;
> > > >  		++ue;
> > > > -		list_add(&e->link, &irq_list);
> > > > -		e = NULL;
> > > >  	}
> > > >  
> > > >  	mutex_lock(&kvm->irq_lock);
> > > > -	list_splice(&kvm->irq_routing, &tmp);
> > > > -	INIT_LIST_HEAD(&kvm->irq_routing);
> > > > -	list_splice(&irq_list, &kvm->irq_routing);
> > > > -	INIT_LIST_HEAD(&irq_list);
> > > > -	list_splice(&tmp, &irq_list);
> > > > +	old = kvm->irq_routing;
> > > > +	rcu_assign_pointer(kvm->irq_routing, new);
> > > >  	mutex_unlock(&kvm->irq_lock);
> > > >  
> > > > +	synchronize_rcu();
> > > > +
> > > >  	r = 0;
> > > > +	new = old;
> > > >  
> > > >  out:
> > > > -	kfree(e);
> > > > -	__kvm_free_irq_routing(&irq_list);
> > > > +	kfree(new);
> > > >  	return r;
> > > >  }
> > > >  
> > > > diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
> > > > index cf20dc1..24013b4 100644
> > > > --- a/virt/kvm/kvm_main.c
> > > > +++ b/virt/kvm/kvm_main.c
> > > > @@ -945,7 +945,6 @@ static struct kvm *kvm_create_vm(void)
> > > >  	if (IS_ERR(kvm))
> > > >  		goto out;
> > > >  #ifdef CONFIG_HAVE_KVM_IRQCHIP
> > > > -	INIT_LIST_HEAD(&kvm->irq_routing);
> > > >  	INIT_HLIST_HEAD(&kvm->mask_notifier_list);
> > > >  #endif
> > > >  
> > > > -- 
> > > > 1.6.2.1
> > > > 
> > > > --
> > > > To unsubscribe from this list: send the line "unsubscribe kvm" in
> > > > the body of a message to majordomo@vger.kernel.org
> > > > More majordomo info at  http://vger.kernel.org/majordomo-info.html
> > 
> > --
> > 			Gleb.
> > --
> > To unsubscribe from this list: send the line "unsubscribe kvm" in
> > the body of a message to majordomo@vger.kernel.org
> > More majordomo info at  http://vger.kernel.org/majordomo-info.html

--
			Gleb.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Gleb Natapov July 13, 2009, 1:25 p.m. UTC | #8
On Mon, Jul 13, 2009 at 09:16:47AM -0400, Gregory Haskins wrote:
> Gleb Natapov wrote:
> > On Mon, Jul 13, 2009 at 09:01:33AM -0400, Gregory Haskins wrote:
> >   
> >> Gleb Natapov wrote:
> >>     
> >>> Signed-off-by: Gleb Natapov <gleb@redhat.com>
> >>> ---
> >>>  include/linux/kvm_host.h |    2 +-
> >>>  virt/kvm/irq_comm.c      |   55 +++++++++++++++++++++-------------------------
> >>>  virt/kvm/kvm_main.c      |    1 -
> >>>  3 files changed, 26 insertions(+), 32 deletions(-)
> >>>
> >>> diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
> >>> index f54a0d3..6756b3e 100644
> >>> --- a/include/linux/kvm_host.h
> >>> +++ b/include/linux/kvm_host.h
> >>> @@ -161,7 +161,7 @@ struct kvm {
> >>>  
> >>>  	struct mutex irq_lock;
> >>>  #ifdef CONFIG_HAVE_KVM_IRQCHIP
> >>> -	struct list_head irq_routing; /* of kvm_kernel_irq_routing_entry */
> >>> +	struct kvm_kernel_irq_routing_entry *irq_routing;
> >>>  	struct hlist_head mask_notifier_list;
> >>>  #endif
> >>>  
> >>> diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c
> >>> index 7af18b8..b2fa3f6 100644
> >>> --- a/virt/kvm/irq_comm.c
> >>> +++ b/virt/kvm/irq_comm.c
> >>> @@ -148,7 +148,8 @@ int kvm_set_irq(struct kvm *kvm, int irq_source_id, int irq, int level)
> >>>  	 * IOAPIC.  So set the bit in both. The guest will ignore
> >>>  	 * writes to the unused one.
> >>>  	 */
> >>> -	list_for_each_entry(e, &kvm->irq_routing, link)
> >>> +	rcu_read_lock();
> >>> +	for (e = rcu_dereference(kvm->irq_routing); e && e->set; e++) {
> >>>   
> >>>       
> >> Hi Gleb,
> >>   I haven't had a chance to fully digest and review these patches, but
> >> one thing I did notice is that you seem to be converting from a list to
> >> an open-coded structure.  I am just curious why you made this design
> >> decision instead of using the RCU variant of list?
> >>
> >>     
> > It is not scary "open-coded structure" it's just an array :) As I responded
> > to Michael the idea is to move msis out of irq_routing, make the array
> > much smaller and either use gsi as an index in the array or use hash table
> > instead looping over all entries. For now I can justify array as more
> > cache friendly data structure as we scan it linearly.
> >   
> 
> Ok, but that might be a good thing to mention in the patch header ;)
> 
What exactly?  Besides this is just an RFC. By the time it will be
applied (if at all) I may do the change already :)

> Kind Regards,
> -Greg
> 
> >   
> >> Regards,
> >> -Greg
> >>
> >>     
> >>>  		if (e->gsi == irq) {
> >>>  			int r = e->set(e, kvm, sig_level);
> >>>  			if (r < 0)
> >>> @@ -156,6 +157,8 @@ int kvm_set_irq(struct kvm *kvm, int irq_source_id, int irq, int level)
> >>>  
> >>>  			ret = r + ((ret < 0) ? 0 : ret);
> >>>  		}
> >>> +	}
> >>> +	rcu_read_unlock();
> >>>  	return ret;
> >>>  }
> >>>  
> >>> @@ -168,12 +171,15 @@ void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin)
> >>>  
> >>>  	trace_kvm_ack_irq(irqchip, pin);
> >>>  
> >>> -	list_for_each_entry(e, &kvm->irq_routing, link)
> >>> +	rcu_read_lock();
> >>> +	for (e = rcu_dereference(kvm->irq_routing); e && e->set; e++) {
> >>>  		if (e->irqchip.irqchip == irqchip &&
> >>>  		    e->irqchip.pin == pin) {
> >>>  			gsi = e->gsi;
> >>>  			break;
> >>>  		}
> >>> +	}
> >>> +	rcu_read_unlock();
> >>>  
> >>>  	hlist_for_each_entry(kian, n, &kvm->arch.irq_ack_notifier_list, link)
> >>>  		if (kian->gsi == gsi)
> >>> @@ -264,19 +270,11 @@ void kvm_fire_mask_notifiers(struct kvm *kvm, int irq, bool mask)
> >>>  			kimn->func(kimn, mask);
> >>>  }
> >>>  
> >>> -static void __kvm_free_irq_routing(struct list_head *irq_routing)
> >>> -{
> >>> -	struct kvm_kernel_irq_routing_entry *e, *n;
> >>> -
> >>> -	list_for_each_entry_safe(e, n, irq_routing, link)
> >>> -		kfree(e);
> >>> -}
> >>> -
> >>>  void kvm_free_irq_routing(struct kvm *kvm)
> >>>  {
> >>> -	mutex_lock(&kvm->irq_lock);
> >>> -	__kvm_free_irq_routing(&kvm->irq_routing);
> >>> -	mutex_unlock(&kvm->irq_lock);
> >>> +	/* Called only during vm destruction. Nobody can use the pointer
> >>> +	   at this stage */
> >>> +	kfree(kvm->irq_routing);
> >>>  }
> >>>  
> >>>  static int setup_routing_entry(struct kvm_kernel_irq_routing_entry *e,
> >>> @@ -326,43 +324,40 @@ int kvm_set_irq_routing(struct kvm *kvm,
> >>>  			unsigned nr,
> >>>  			unsigned flags)
> >>>  {
> >>> -	struct list_head irq_list = LIST_HEAD_INIT(irq_list);
> >>> -	struct list_head tmp = LIST_HEAD_INIT(tmp);
> >>> -	struct kvm_kernel_irq_routing_entry *e = NULL;
> >>> +	struct kvm_kernel_irq_routing_entry *new, *old;
> >>>  	unsigned i;
> >>>  	int r;
> >>>  
> >>> +	/* last element is left zeroed and indicates the end of the array */
> >>> +	new = kzalloc(sizeof(*new) * (nr + 1), GFP_KERNEL);
> >>> +
> >>> +	if (!new)
> >>> +		return -ENOMEM;
> >>> +
> >>>  	for (i = 0; i < nr; ++i) {
> >>>  		r = -EINVAL;
> >>>  		if (ue->gsi >= KVM_MAX_IRQ_ROUTES)
> >>>  			goto out;
> >>>  		if (ue->flags)
> >>>  			goto out;
> >>> -		r = -ENOMEM;
> >>> -		e = kzalloc(sizeof(*e), GFP_KERNEL);
> >>> -		if (!e)
> >>> -			goto out;
> >>> -		r = setup_routing_entry(e, ue);
> >>> +		r = setup_routing_entry(new + i, ue);
> >>>  		if (r)
> >>>  			goto out;
> >>>  		++ue;
> >>> -		list_add(&e->link, &irq_list);
> >>> -		e = NULL;
> >>>  	}
> >>>  
> >>>  	mutex_lock(&kvm->irq_lock);
> >>> -	list_splice(&kvm->irq_routing, &tmp);
> >>> -	INIT_LIST_HEAD(&kvm->irq_routing);
> >>> -	list_splice(&irq_list, &kvm->irq_routing);
> >>> -	INIT_LIST_HEAD(&irq_list);
> >>> -	list_splice(&tmp, &irq_list);
> >>> +	old = kvm->irq_routing;
> >>> +	rcu_assign_pointer(kvm->irq_routing, new);
> >>>  	mutex_unlock(&kvm->irq_lock);
> >>>  
> >>> +	synchronize_rcu();
> >>> +
> >>>  	r = 0;
> >>> +	new = old;
> >>>  
> >>>  out:
> >>> -	kfree(e);
> >>> -	__kvm_free_irq_routing(&irq_list);
> >>> +	kfree(new);
> >>>  	return r;
> >>>  }
> >>>  
> >>> diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
> >>> index cf20dc1..24013b4 100644
> >>> --- a/virt/kvm/kvm_main.c
> >>> +++ b/virt/kvm/kvm_main.c
> >>> @@ -945,7 +945,6 @@ static struct kvm *kvm_create_vm(void)
> >>>  	if (IS_ERR(kvm))
> >>>  		goto out;
> >>>  #ifdef CONFIG_HAVE_KVM_IRQCHIP
> >>> -	INIT_LIST_HEAD(&kvm->irq_routing);
> >>>  	INIT_HLIST_HEAD(&kvm->mask_notifier_list);
> >>>  #endif
> >>>  
> >>>   
> >>>       
> >>     
> >
> >
> >
> > --
> > 			Gleb.
> > --
> > To unsubscribe from this list: send the line "unsubscribe kvm" in
> > the body of a message to majordomo@vger.kernel.org
> > More majordomo info at  http://vger.kernel.org/majordomo-info.html
> >   
> 
> 



--
			Gleb.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Gregory Haskins July 13, 2009, 1:29 p.m. UTC | #9
Gleb Natapov wrote:
> On Mon, Jul 13, 2009 at 09:16:47AM -0400, Gregory Haskins wrote:
>   
>> Gleb Natapov wrote:
>>     
>>> On Mon, Jul 13, 2009 at 09:01:33AM -0400, Gregory Haskins wrote:
>>>   
>>>       
>>>> Gleb Natapov wrote:
>>>>     
>>>>         
>>>>> Signed-off-by: Gleb Natapov <gleb@redhat.com>
>>>>> ---
>>>>>  include/linux/kvm_host.h |    2 +-
>>>>>  virt/kvm/irq_comm.c      |   55 +++++++++++++++++++++-------------------------
>>>>>  virt/kvm/kvm_main.c      |    1 -
>>>>>  3 files changed, 26 insertions(+), 32 deletions(-)
>>>>>
>>>>> diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
>>>>> index f54a0d3..6756b3e 100644
>>>>> --- a/include/linux/kvm_host.h
>>>>> +++ b/include/linux/kvm_host.h
>>>>> @@ -161,7 +161,7 @@ struct kvm {
>>>>>  
>>>>>  	struct mutex irq_lock;
>>>>>  #ifdef CONFIG_HAVE_KVM_IRQCHIP
>>>>> -	struct list_head irq_routing; /* of kvm_kernel_irq_routing_entry */
>>>>> +	struct kvm_kernel_irq_routing_entry *irq_routing;
>>>>>  	struct hlist_head mask_notifier_list;
>>>>>  #endif
>>>>>  
>>>>> diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c
>>>>> index 7af18b8..b2fa3f6 100644
>>>>> --- a/virt/kvm/irq_comm.c
>>>>> +++ b/virt/kvm/irq_comm.c
>>>>> @@ -148,7 +148,8 @@ int kvm_set_irq(struct kvm *kvm, int irq_source_id, int irq, int level)
>>>>>  	 * IOAPIC.  So set the bit in both. The guest will ignore
>>>>>  	 * writes to the unused one.
>>>>>  	 */
>>>>> -	list_for_each_entry(e, &kvm->irq_routing, link)
>>>>> +	rcu_read_lock();
>>>>> +	for (e = rcu_dereference(kvm->irq_routing); e && e->set; e++) {
>>>>>   
>>>>>       
>>>>>           
>>>> Hi Gleb,
>>>>   I haven't had a chance to fully digest and review these patches, but
>>>> one thing I did notice is that you seem to be converting from a list to
>>>> an open-coded structure.  I am just curious why you made this design
>>>> decision instead of using the RCU variant of list?
>>>>
>>>>     
>>>>         
>>> It is not scary "open-coded structure" it's just an array :) As I responded
>>> to Michael the idea is to move msis out of irq_routing, make the array
>>> much smaller and either use gsi as an index in the array or use hash table
>>> instead looping over all entries. For now I can justify array as more
>>> cache friendly data structure as we scan it linearly.
>>>   
>>>       
>> Ok, but that might be a good thing to mention in the patch header ;)
>>
>>     
> What exactly?  Besides this is just an RFC. By the time it will be
> applied (if at all) I may do the change already :)
>   

Heh, thats fine.  FWIW, I would suggest this:

"the idea is to move msis out of irq_routing, make the array
much smaller and either use gsi as an index in the array or use hash table
instead looping over all entries. For now I can justify array as more
cache friendly data structure as we scan it linearly"

Otherwise, reviewers might be curious why you are not using list_X_rcu() ;)

Kind Regards,
-Greg
Michael S. Tsirkin July 13, 2009, 1:36 p.m. UTC | #10
On Mon, Jul 13, 2009 at 04:23:49PM +0300, Gleb Natapov wrote:
> > resource. Maybe it's a good idea to use an array instead of a list. All
> > I'm saying, RCU does not force you to do this.
> > 
> It doesn't, but list shouldn't be used here in the first place.

OK, but I think the change and the reason for it should be documented in
the patch subject and description.
Marcelo Tosatti July 13, 2009, 3:55 p.m. UTC | #11
On Mon, Jul 13, 2009 at 04:15:34PM +0300, Gleb Natapov wrote:
> On Mon, Jul 13, 2009 at 09:01:33AM -0400, Gregory Haskins wrote:
> > Gleb Natapov wrote:
> > > Signed-off-by: Gleb Natapov <gleb@redhat.com>
> > > ---
> > >  include/linux/kvm_host.h |    2 +-
> > >  virt/kvm/irq_comm.c      |   55 +++++++++++++++++++++-------------------------
> > >  virt/kvm/kvm_main.c      |    1 -
> > >  3 files changed, 26 insertions(+), 32 deletions(-)
> > >
> > > diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
> > > index f54a0d3..6756b3e 100644
> > > --- a/include/linux/kvm_host.h
> > > +++ b/include/linux/kvm_host.h
> > > @@ -161,7 +161,7 @@ struct kvm {
> > >  
> > >  	struct mutex irq_lock;
> > >  #ifdef CONFIG_HAVE_KVM_IRQCHIP
> > > -	struct list_head irq_routing; /* of kvm_kernel_irq_routing_entry */
> > > +	struct kvm_kernel_irq_routing_entry *irq_routing;
> > >  	struct hlist_head mask_notifier_list;
> > >  #endif
> > >  
> > > diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c
> > > index 7af18b8..b2fa3f6 100644
> > > --- a/virt/kvm/irq_comm.c
> > > +++ b/virt/kvm/irq_comm.c
> > > @@ -148,7 +148,8 @@ int kvm_set_irq(struct kvm *kvm, int irq_source_id, int irq, int level)
> > >  	 * IOAPIC.  So set the bit in both. The guest will ignore
> > >  	 * writes to the unused one.
> > >  	 */
> > > -	list_for_each_entry(e, &kvm->irq_routing, link)
> > > +	rcu_read_lock();
> > > +	for (e = rcu_dereference(kvm->irq_routing); e && e->set; e++) {
> > >   
> > 
> > Hi Gleb,
> >   I haven't had a chance to fully digest and review these patches, but
> > one thing I did notice is that you seem to be converting from a list to
> > an open-coded structure.  I am just curious why you made this design
> > decision instead of using the RCU variant of list?
> > 
> It is not scary "open-coded structure" it's just an array :) As I responded
> to Michael the idea is to move msis out of irq_routing, make the array
> much smaller and either use gsi as an index in the array or use hash table
> instead looping over all entries. For now I can justify array as more
> cache friendly data structure as we scan it linearly.

I think its more important to convert to faster search mechanism (the
list walk shows up high in profiling), then convert to RCU?

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Gleb Natapov July 13, 2009, 4:24 p.m. UTC | #12
On Mon, Jul 13, 2009 at 12:55:31PM -0300, Marcelo Tosatti wrote:
> On Mon, Jul 13, 2009 at 04:15:34PM +0300, Gleb Natapov wrote:
> > On Mon, Jul 13, 2009 at 09:01:33AM -0400, Gregory Haskins wrote:
> > > Gleb Natapov wrote:
> > > > Signed-off-by: Gleb Natapov <gleb@redhat.com>
> > > > ---
> > > >  include/linux/kvm_host.h |    2 +-
> > > >  virt/kvm/irq_comm.c      |   55 +++++++++++++++++++++-------------------------
> > > >  virt/kvm/kvm_main.c      |    1 -
> > > >  3 files changed, 26 insertions(+), 32 deletions(-)
> > > >
> > > > diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
> > > > index f54a0d3..6756b3e 100644
> > > > --- a/include/linux/kvm_host.h
> > > > +++ b/include/linux/kvm_host.h
> > > > @@ -161,7 +161,7 @@ struct kvm {
> > > >  
> > > >  	struct mutex irq_lock;
> > > >  #ifdef CONFIG_HAVE_KVM_IRQCHIP
> > > > -	struct list_head irq_routing; /* of kvm_kernel_irq_routing_entry */
> > > > +	struct kvm_kernel_irq_routing_entry *irq_routing;
> > > >  	struct hlist_head mask_notifier_list;
> > > >  #endif
> > > >  
> > > > diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c
> > > > index 7af18b8..b2fa3f6 100644
> > > > --- a/virt/kvm/irq_comm.c
> > > > +++ b/virt/kvm/irq_comm.c
> > > > @@ -148,7 +148,8 @@ int kvm_set_irq(struct kvm *kvm, int irq_source_id, int irq, int level)
> > > >  	 * IOAPIC.  So set the bit in both. The guest will ignore
> > > >  	 * writes to the unused one.
> > > >  	 */
> > > > -	list_for_each_entry(e, &kvm->irq_routing, link)
> > > > +	rcu_read_lock();
> > > > +	for (e = rcu_dereference(kvm->irq_routing); e && e->set; e++) {
> > > >   
> > > 
> > > Hi Gleb,
> > >   I haven't had a chance to fully digest and review these patches, but
> > > one thing I did notice is that you seem to be converting from a list to
> > > an open-coded structure.  I am just curious why you made this design
> > > decision instead of using the RCU variant of list?
> > > 
> > It is not scary "open-coded structure" it's just an array :) As I responded
> > to Michael the idea is to move msis out of irq_routing, make the array
> > much smaller and either use gsi as an index in the array or use hash table
> > instead looping over all entries. For now I can justify array as more
> > cache friendly data structure as we scan it linearly.
> 
> I think its more important to convert to faster search mechanism (the
> list walk shows up high in profiling), then convert to RCU?
Why in this order? I am working on faster search mechanism now (on top
of the series).

--
			Gleb.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Marcelo Tosatti July 13, 2009, 4:27 p.m. UTC | #13
On Mon, Jul 13, 2009 at 07:24:53PM +0300, Gleb Natapov wrote:
> On Mon, Jul 13, 2009 at 12:55:31PM -0300, Marcelo Tosatti wrote:
> > On Mon, Jul 13, 2009 at 04:15:34PM +0300, Gleb Natapov wrote:
> > > On Mon, Jul 13, 2009 at 09:01:33AM -0400, Gregory Haskins wrote:
> > > > Gleb Natapov wrote:
> > > > > Signed-off-by: Gleb Natapov <gleb@redhat.com>
> > > > > ---
> > > > >  include/linux/kvm_host.h |    2 +-
> > > > >  virt/kvm/irq_comm.c      |   55 +++++++++++++++++++++-------------------------
> > > > >  virt/kvm/kvm_main.c      |    1 -
> > > > >  3 files changed, 26 insertions(+), 32 deletions(-)
> > > > >
> > > > > diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
> > > > > index f54a0d3..6756b3e 100644
> > > > > --- a/include/linux/kvm_host.h
> > > > > +++ b/include/linux/kvm_host.h
> > > > > @@ -161,7 +161,7 @@ struct kvm {
> > > > >  
> > > > >  	struct mutex irq_lock;
> > > > >  #ifdef CONFIG_HAVE_KVM_IRQCHIP
> > > > > -	struct list_head irq_routing; /* of kvm_kernel_irq_routing_entry */
> > > > > +	struct kvm_kernel_irq_routing_entry *irq_routing;
> > > > >  	struct hlist_head mask_notifier_list;
> > > > >  #endif
> > > > >  
> > > > > diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c
> > > > > index 7af18b8..b2fa3f6 100644
> > > > > --- a/virt/kvm/irq_comm.c
> > > > > +++ b/virt/kvm/irq_comm.c
> > > > > @@ -148,7 +148,8 @@ int kvm_set_irq(struct kvm *kvm, int irq_source_id, int irq, int level)
> > > > >  	 * IOAPIC.  So set the bit in both. The guest will ignore
> > > > >  	 * writes to the unused one.
> > > > >  	 */
> > > > > -	list_for_each_entry(e, &kvm->irq_routing, link)
> > > > > +	rcu_read_lock();
> > > > > +	for (e = rcu_dereference(kvm->irq_routing); e && e->set; e++) {
> > > > >   
> > > > 
> > > > Hi Gleb,
> > > >   I haven't had a chance to fully digest and review these patches, but
> > > > one thing I did notice is that you seem to be converting from a list to
> > > > an open-coded structure.  I am just curious why you made this design
> > > > decision instead of using the RCU variant of list?
> > > > 
> > > It is not scary "open-coded structure" it's just an array :) As I responded
> > > to Michael the idea is to move msis out of irq_routing, make the array
> > > much smaller and either use gsi as an index in the array or use hash table
> > > instead looping over all entries. For now I can justify array as more
> > > cache friendly data structure as we scan it linearly.
> > 
> > I think its more important to convert to faster search mechanism (the
> > list walk shows up high in profiling), then convert to RCU?
> Why in this order? I am working on faster search mechanism now (on top
> of the series).

Because as Michael mentioned we can use slots_lock (should be renamed
to global_lock) instead of RCU on the write-side.

Note it moves a lot of burden to the writer side, but its much simpler
than RCU and you stop the spread of locks. Needs to be discussed...

> 
> --
> 			Gleb.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Gleb Natapov July 13, 2009, 4:33 p.m. UTC | #14
On Mon, Jul 13, 2009 at 01:27:38PM -0300, Marcelo Tosatti wrote:
> On Mon, Jul 13, 2009 at 07:24:53PM +0300, Gleb Natapov wrote:
> > On Mon, Jul 13, 2009 at 12:55:31PM -0300, Marcelo Tosatti wrote:
> > > On Mon, Jul 13, 2009 at 04:15:34PM +0300, Gleb Natapov wrote:
> > > > On Mon, Jul 13, 2009 at 09:01:33AM -0400, Gregory Haskins wrote:
> > > > > Gleb Natapov wrote:
> > > > > > Signed-off-by: Gleb Natapov <gleb@redhat.com>
> > > > > > ---
> > > > > >  include/linux/kvm_host.h |    2 +-
> > > > > >  virt/kvm/irq_comm.c      |   55 +++++++++++++++++++++-------------------------
> > > > > >  virt/kvm/kvm_main.c      |    1 -
> > > > > >  3 files changed, 26 insertions(+), 32 deletions(-)
> > > > > >
> > > > > > diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
> > > > > > index f54a0d3..6756b3e 100644
> > > > > > --- a/include/linux/kvm_host.h
> > > > > > +++ b/include/linux/kvm_host.h
> > > > > > @@ -161,7 +161,7 @@ struct kvm {
> > > > > >  
> > > > > >  	struct mutex irq_lock;
> > > > > >  #ifdef CONFIG_HAVE_KVM_IRQCHIP
> > > > > > -	struct list_head irq_routing; /* of kvm_kernel_irq_routing_entry */
> > > > > > +	struct kvm_kernel_irq_routing_entry *irq_routing;
> > > > > >  	struct hlist_head mask_notifier_list;
> > > > > >  #endif
> > > > > >  
> > > > > > diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c
> > > > > > index 7af18b8..b2fa3f6 100644
> > > > > > --- a/virt/kvm/irq_comm.c
> > > > > > +++ b/virt/kvm/irq_comm.c
> > > > > > @@ -148,7 +148,8 @@ int kvm_set_irq(struct kvm *kvm, int irq_source_id, int irq, int level)
> > > > > >  	 * IOAPIC.  So set the bit in both. The guest will ignore
> > > > > >  	 * writes to the unused one.
> > > > > >  	 */
> > > > > > -	list_for_each_entry(e, &kvm->irq_routing, link)
> > > > > > +	rcu_read_lock();
> > > > > > +	for (e = rcu_dereference(kvm->irq_routing); e && e->set; e++) {
> > > > > >   
> > > > > 
> > > > > Hi Gleb,
> > > > >   I haven't had a chance to fully digest and review these patches, but
> > > > > one thing I did notice is that you seem to be converting from a list to
> > > > > an open-coded structure.  I am just curious why you made this design
> > > > > decision instead of using the RCU variant of list?
> > > > > 
> > > > It is not scary "open-coded structure" it's just an array :) As I responded
> > > > to Michael the idea is to move msis out of irq_routing, make the array
> > > > much smaller and either use gsi as an index in the array or use hash table
> > > > instead looping over all entries. For now I can justify array as more
> > > > cache friendly data structure as we scan it linearly.
> > > 
> > > I think its more important to convert to faster search mechanism (the
> > > list walk shows up high in profiling), then convert to RCU?
> > Why in this order? I am working on faster search mechanism now (on top
> > of the series).
> 
> Because as Michael mentioned we can use slots_lock (should be renamed
> to global_lock) instead of RCU on the write-side.
> 
I don't get it. The point for RCU is to get rid of reader's lock. If
I'll have to take slot_lock on each EOI I achieved nothing.

> Note it moves a lot of burden to the writer side, but its much simpler
> than RCU and you stop the spread of locks. Needs to be discussed...
> 
I much prefer to have many well defined locks with well understood
scope, then a small number of globals locks that are split ad-hoc when
deadlock is discovered (lock->irq_lock).

--
			Gleb.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Marcelo Tosatti July 13, 2009, 4:42 p.m. UTC | #15
On Mon, Jul 13, 2009 at 07:33:30PM +0300, Gleb Natapov wrote:
> On Mon, Jul 13, 2009 at 01:27:38PM -0300, Marcelo Tosatti wrote:
> > On Mon, Jul 13, 2009 at 07:24:53PM +0300, Gleb Natapov wrote:
> > > On Mon, Jul 13, 2009 at 12:55:31PM -0300, Marcelo Tosatti wrote:
> > > > On Mon, Jul 13, 2009 at 04:15:34PM +0300, Gleb Natapov wrote:
> > > > > On Mon, Jul 13, 2009 at 09:01:33AM -0400, Gregory Haskins wrote:
> > > > > > Gleb Natapov wrote:
> > > > > > > Signed-off-by: Gleb Natapov <gleb@redhat.com>
> > > > > > > ---
> > > > > > >  include/linux/kvm_host.h |    2 +-
> > > > > > >  virt/kvm/irq_comm.c      |   55 +++++++++++++++++++++-------------------------
> > > > > > >  virt/kvm/kvm_main.c      |    1 -
> > > > > > >  3 files changed, 26 insertions(+), 32 deletions(-)
> > > > > > >
> > > > > > > diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
> > > > > > > index f54a0d3..6756b3e 100644
> > > > > > > --- a/include/linux/kvm_host.h
> > > > > > > +++ b/include/linux/kvm_host.h
> > > > > > > @@ -161,7 +161,7 @@ struct kvm {
> > > > > > >  
> > > > > > >  	struct mutex irq_lock;
> > > > > > >  #ifdef CONFIG_HAVE_KVM_IRQCHIP
> > > > > > > -	struct list_head irq_routing; /* of kvm_kernel_irq_routing_entry */
> > > > > > > +	struct kvm_kernel_irq_routing_entry *irq_routing;
> > > > > > >  	struct hlist_head mask_notifier_list;
> > > > > > >  #endif
> > > > > > >  
> > > > > > > diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c
> > > > > > > index 7af18b8..b2fa3f6 100644
> > > > > > > --- a/virt/kvm/irq_comm.c
> > > > > > > +++ b/virt/kvm/irq_comm.c
> > > > > > > @@ -148,7 +148,8 @@ int kvm_set_irq(struct kvm *kvm, int irq_source_id, int irq, int level)
> > > > > > >  	 * IOAPIC.  So set the bit in both. The guest will ignore
> > > > > > >  	 * writes to the unused one.
> > > > > > >  	 */
> > > > > > > -	list_for_each_entry(e, &kvm->irq_routing, link)
> > > > > > > +	rcu_read_lock();
> > > > > > > +	for (e = rcu_dereference(kvm->irq_routing); e && e->set; e++) {
> > > > > > >   
> > > > > > 
> > > > > > Hi Gleb,
> > > > > >   I haven't had a chance to fully digest and review these patches, but
> > > > > > one thing I did notice is that you seem to be converting from a list to
> > > > > > an open-coded structure.  I am just curious why you made this design
> > > > > > decision instead of using the RCU variant of list?
> > > > > > 
> > > > > It is not scary "open-coded structure" it's just an array :) As I responded
> > > > > to Michael the idea is to move msis out of irq_routing, make the array
> > > > > much smaller and either use gsi as an index in the array or use hash table
> > > > > instead looping over all entries. For now I can justify array as more
> > > > > cache friendly data structure as we scan it linearly.
> > > > 
> > > > I think its more important to convert to faster search mechanism (the
> > > > list walk shows up high in profiling), then convert to RCU?
> > > Why in this order? I am working on faster search mechanism now (on top
> > > of the series).
> > 
> > Because as Michael mentioned we can use slots_lock (should be renamed
> > to global_lock) instead of RCU on the write-side.
> > 
> I don't get it. The point for RCU is to get rid of reader's lock. If
> I'll have to take slot_lock on each EOI I achieved nothing.

You already take slots_lock for read on every exit.

> > than RCU and you stop the spread of locks. Needs to be discussed...
> > 
> I much prefer to have many well defined locks with well understood
> scope, then a small number of globals locks that are split ad-hoc when
> deadlock is discovered (lock->irq_lock).

OK.


--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Gleb Natapov July 13, 2009, 4:44 p.m. UTC | #16
On Mon, Jul 13, 2009 at 01:42:13PM -0300, Marcelo Tosatti wrote:
> On Mon, Jul 13, 2009 at 07:33:30PM +0300, Gleb Natapov wrote:
> > On Mon, Jul 13, 2009 at 01:27:38PM -0300, Marcelo Tosatti wrote:
> > > On Mon, Jul 13, 2009 at 07:24:53PM +0300, Gleb Natapov wrote:
> > > > On Mon, Jul 13, 2009 at 12:55:31PM -0300, Marcelo Tosatti wrote:
> > > > > On Mon, Jul 13, 2009 at 04:15:34PM +0300, Gleb Natapov wrote:
> > > > > > On Mon, Jul 13, 2009 at 09:01:33AM -0400, Gregory Haskins wrote:
> > > > > > > Gleb Natapov wrote:
> > > > > > > > Signed-off-by: Gleb Natapov <gleb@redhat.com>
> > > > > > > > ---
> > > > > > > >  include/linux/kvm_host.h |    2 +-
> > > > > > > >  virt/kvm/irq_comm.c      |   55 +++++++++++++++++++++-------------------------
> > > > > > > >  virt/kvm/kvm_main.c      |    1 -
> > > > > > > >  3 files changed, 26 insertions(+), 32 deletions(-)
> > > > > > > >
> > > > > > > > diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
> > > > > > > > index f54a0d3..6756b3e 100644
> > > > > > > > --- a/include/linux/kvm_host.h
> > > > > > > > +++ b/include/linux/kvm_host.h
> > > > > > > > @@ -161,7 +161,7 @@ struct kvm {
> > > > > > > >  
> > > > > > > >  	struct mutex irq_lock;
> > > > > > > >  #ifdef CONFIG_HAVE_KVM_IRQCHIP
> > > > > > > > -	struct list_head irq_routing; /* of kvm_kernel_irq_routing_entry */
> > > > > > > > +	struct kvm_kernel_irq_routing_entry *irq_routing;
> > > > > > > >  	struct hlist_head mask_notifier_list;
> > > > > > > >  #endif
> > > > > > > >  
> > > > > > > > diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c
> > > > > > > > index 7af18b8..b2fa3f6 100644
> > > > > > > > --- a/virt/kvm/irq_comm.c
> > > > > > > > +++ b/virt/kvm/irq_comm.c
> > > > > > > > @@ -148,7 +148,8 @@ int kvm_set_irq(struct kvm *kvm, int irq_source_id, int irq, int level)
> > > > > > > >  	 * IOAPIC.  So set the bit in both. The guest will ignore
> > > > > > > >  	 * writes to the unused one.
> > > > > > > >  	 */
> > > > > > > > -	list_for_each_entry(e, &kvm->irq_routing, link)
> > > > > > > > +	rcu_read_lock();
> > > > > > > > +	for (e = rcu_dereference(kvm->irq_routing); e && e->set; e++) {
> > > > > > > >   
> > > > > > > 
> > > > > > > Hi Gleb,
> > > > > > >   I haven't had a chance to fully digest and review these patches, but
> > > > > > > one thing I did notice is that you seem to be converting from a list to
> > > > > > > an open-coded structure.  I am just curious why you made this design
> > > > > > > decision instead of using the RCU variant of list?
> > > > > > > 
> > > > > > It is not scary "open-coded structure" it's just an array :) As I responded
> > > > > > to Michael the idea is to move msis out of irq_routing, make the array
> > > > > > much smaller and either use gsi as an index in the array or use hash table
> > > > > > instead looping over all entries. For now I can justify array as more
> > > > > > cache friendly data structure as we scan it linearly.
> > > > > 
> > > > > I think its more important to convert to faster search mechanism (the
> > > > > list walk shows up high in profiling), then convert to RCU?
> > > > Why in this order? I am working on faster search mechanism now (on top
> > > > of the series).
> > > 
> > > Because as Michael mentioned we can use slots_lock (should be renamed
> > > to global_lock) instead of RCU on the write-side.
> > > 
> > I don't get it. The point for RCU is to get rid of reader's lock. If
> > I'll have to take slot_lock on each EOI I achieved nothing.
> 
> You already take slots_lock for read on every exit.
> 
We should fix that, not add even more users. Shouldn't we?

> > > than RCU and you stop the spread of locks. Needs to be discussed...
> > > 
> > I much prefer to have many well defined locks with well understood
> > scope, then a small number of globals locks that are split ad-hoc when
> > deadlock is discovered (lock->irq_lock).
> 
> OK.
> 

--
			Gleb.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Marcelo Tosatti July 13, 2009, 4:45 p.m. UTC | #17
On Mon, Jul 13, 2009 at 07:44:38PM +0300, Gleb Natapov wrote:
> > > I don't get it. The point for RCU is to get rid of reader's lock. If
> > > I'll have to take slot_lock on each EOI I achieved nothing.
> > 
> > You already take slots_lock for read on every exit.
> > 
> We should fix that, not add even more users. Shouldn't we?

Yes. Please send both patchsets as one when you resend then, its easier 
to review.

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Gleb Natapov July 13, 2009, 4:54 p.m. UTC | #18
On Mon, Jul 13, 2009 at 01:45:07PM -0300, Marcelo Tosatti wrote:
> On Mon, Jul 13, 2009 at 07:44:38PM +0300, Gleb Natapov wrote:
> > > > I don't get it. The point for RCU is to get rid of reader's lock. If
> > > > I'll have to take slot_lock on each EOI I achieved nothing.
> > > 
> > > You already take slots_lock for read on every exit.
> > > 
> > We should fix that, not add even more users. Shouldn't we?
> 
> Yes. Please send both patchsets as one when you resend then, its easier 
> to review.
OK.

--
			Gleb.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index f54a0d3..6756b3e 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -161,7 +161,7 @@  struct kvm {
 
 	struct mutex irq_lock;
 #ifdef CONFIG_HAVE_KVM_IRQCHIP
-	struct list_head irq_routing; /* of kvm_kernel_irq_routing_entry */
+	struct kvm_kernel_irq_routing_entry *irq_routing;
 	struct hlist_head mask_notifier_list;
 #endif
 
diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c
index 7af18b8..b2fa3f6 100644
--- a/virt/kvm/irq_comm.c
+++ b/virt/kvm/irq_comm.c
@@ -148,7 +148,8 @@  int kvm_set_irq(struct kvm *kvm, int irq_source_id, int irq, int level)
 	 * IOAPIC.  So set the bit in both. The guest will ignore
 	 * writes to the unused one.
 	 */
-	list_for_each_entry(e, &kvm->irq_routing, link)
+	rcu_read_lock();
+	for (e = rcu_dereference(kvm->irq_routing); e && e->set; e++) {
 		if (e->gsi == irq) {
 			int r = e->set(e, kvm, sig_level);
 			if (r < 0)
@@ -156,6 +157,8 @@  int kvm_set_irq(struct kvm *kvm, int irq_source_id, int irq, int level)
 
 			ret = r + ((ret < 0) ? 0 : ret);
 		}
+	}
+	rcu_read_unlock();
 	return ret;
 }
 
@@ -168,12 +171,15 @@  void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin)
 
 	trace_kvm_ack_irq(irqchip, pin);
 
-	list_for_each_entry(e, &kvm->irq_routing, link)
+	rcu_read_lock();
+	for (e = rcu_dereference(kvm->irq_routing); e && e->set; e++) {
 		if (e->irqchip.irqchip == irqchip &&
 		    e->irqchip.pin == pin) {
 			gsi = e->gsi;
 			break;
 		}
+	}
+	rcu_read_unlock();
 
 	hlist_for_each_entry(kian, n, &kvm->arch.irq_ack_notifier_list, link)
 		if (kian->gsi == gsi)
@@ -264,19 +270,11 @@  void kvm_fire_mask_notifiers(struct kvm *kvm, int irq, bool mask)
 			kimn->func(kimn, mask);
 }
 
-static void __kvm_free_irq_routing(struct list_head *irq_routing)
-{
-	struct kvm_kernel_irq_routing_entry *e, *n;
-
-	list_for_each_entry_safe(e, n, irq_routing, link)
-		kfree(e);
-}
-
 void kvm_free_irq_routing(struct kvm *kvm)
 {
-	mutex_lock(&kvm->irq_lock);
-	__kvm_free_irq_routing(&kvm->irq_routing);
-	mutex_unlock(&kvm->irq_lock);
+	/* Called only during vm destruction. Nobody can use the pointer
+	   at this stage */
+	kfree(kvm->irq_routing);
 }
 
 static int setup_routing_entry(struct kvm_kernel_irq_routing_entry *e,
@@ -326,43 +324,40 @@  int kvm_set_irq_routing(struct kvm *kvm,
 			unsigned nr,
 			unsigned flags)
 {
-	struct list_head irq_list = LIST_HEAD_INIT(irq_list);
-	struct list_head tmp = LIST_HEAD_INIT(tmp);
-	struct kvm_kernel_irq_routing_entry *e = NULL;
+	struct kvm_kernel_irq_routing_entry *new, *old;
 	unsigned i;
 	int r;
 
+	/* last element is left zeroed and indicates the end of the array */
+	new = kzalloc(sizeof(*new) * (nr + 1), GFP_KERNEL);
+
+	if (!new)
+		return -ENOMEM;
+
 	for (i = 0; i < nr; ++i) {
 		r = -EINVAL;
 		if (ue->gsi >= KVM_MAX_IRQ_ROUTES)
 			goto out;
 		if (ue->flags)
 			goto out;
-		r = -ENOMEM;
-		e = kzalloc(sizeof(*e), GFP_KERNEL);
-		if (!e)
-			goto out;
-		r = setup_routing_entry(e, ue);
+		r = setup_routing_entry(new + i, ue);
 		if (r)
 			goto out;
 		++ue;
-		list_add(&e->link, &irq_list);
-		e = NULL;
 	}
 
 	mutex_lock(&kvm->irq_lock);
-	list_splice(&kvm->irq_routing, &tmp);
-	INIT_LIST_HEAD(&kvm->irq_routing);
-	list_splice(&irq_list, &kvm->irq_routing);
-	INIT_LIST_HEAD(&irq_list);
-	list_splice(&tmp, &irq_list);
+	old = kvm->irq_routing;
+	rcu_assign_pointer(kvm->irq_routing, new);
 	mutex_unlock(&kvm->irq_lock);
 
+	synchronize_rcu();
+
 	r = 0;
+	new = old;
 
 out:
-	kfree(e);
-	__kvm_free_irq_routing(&irq_list);
+	kfree(new);
 	return r;
 }
 
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index cf20dc1..24013b4 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -945,7 +945,6 @@  static struct kvm *kvm_create_vm(void)
 	if (IS_ERR(kvm))
 		goto out;
 #ifdef CONFIG_HAVE_KVM_IRQCHIP
-	INIT_LIST_HEAD(&kvm->irq_routing);
 	INIT_HLIST_HEAD(&kvm->mask_notifier_list);
 #endif