diff mbox

[10/10] Change irq routing table to use gsi indexed array.

Message ID 1247581845-7625-11-git-send-email-gleb@redhat.com (mailing list archive)
State New, archived
Headers show

Commit Message

Gleb Natapov July 14, 2009, 2:30 p.m. UTC
Use gsi indexed array instead of scanning all entries on each interrupt
injection. Also maintain back mapping from irqchip/pin to gsi to speedup
interrupt acknowledgment notifications.

Signed-off-by: Gleb Natapov <gleb@redhat.com>
---
 include/linux/kvm_host.h |   11 ++++++-
 virt/kvm/irq_comm.c      |   62 ++++++++++++++++++++++++++++-----------------
 2 files changed, 47 insertions(+), 26 deletions(-)

Comments

Marcelo Tosatti July 15, 2009, 6:18 p.m. UTC | #1
On Tue, Jul 14, 2009 at 05:30:45PM +0300, Gleb Natapov wrote:
> Use gsi indexed array instead of scanning all entries on each interrupt
> injection. Also maintain back mapping from irqchip/pin to gsi to speedup
> interrupt acknowledgment notifications.
> 
> Signed-off-by: Gleb Natapov <gleb@redhat.com>
> ---
>  include/linux/kvm_host.h |   11 ++++++-
>  virt/kvm/irq_comm.c      |   62 ++++++++++++++++++++++++++++-----------------
>  2 files changed, 47 insertions(+), 26 deletions(-)
> 
> diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
> index aa64d0d..ae6cbf1 100644
> --- a/include/linux/kvm_host.h
> +++ b/include/linux/kvm_host.h
> @@ -128,7 +128,14 @@ struct kvm_kernel_irq_routing_entry {
>  		} irqchip;
>  		struct msi_msg msi;
>  	};
> -	struct list_head link;
> +	struct hlist_node link;
> +};
> +
> +struct kvm_irq_routing_table {
> +	int chip[3][KVM_IOAPIC_NUM_PINS];
> +	struct kvm_kernel_irq_routing_entry *rt_entries;
> +	u32 max_gsi;
> +	struct hlist_head map[0];
>  };
>  
>  struct kvm {
> @@ -165,7 +172,7 @@ struct kvm {
>  #endif
>  
>  #ifdef CONFIG_HAVE_KVM_IRQCHIP
> -	struct kvm_kernel_irq_routing_entry *irq_routing;
> +	struct kvm_irq_routing_table *irq_routing;
>  	spinlock_t irq_routing_lock;
>  	struct hlist_head mask_notifier_list;
>  	struct hlist_head irq_ack_notifier_list;
> diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c
> index c54a28b..da643d4 100644
> --- a/virt/kvm/irq_comm.c
> +++ b/virt/kvm/irq_comm.c
> @@ -125,6 +125,8 @@ int kvm_set_irq(struct kvm *kvm, int irq_source_id, int irq, int level)
>  	struct kvm_kernel_irq_routing_entry *e;
>  	unsigned long *irq_state, sig_level;
>  	int ret = -1;
> +	struct kvm_irq_routing_table *irq_rt;
> +	struct hlist_node *n;
>  
>  	trace_kvm_set_irq(irq, level, irq_source_id);
>  
> @@ -147,14 +149,13 @@ int kvm_set_irq(struct kvm *kvm, int irq_source_id, int irq, int level)
>  	 * writes to the unused one.
>  	 */
>  	rcu_read_lock();
> -	for (e = rcu_dereference(kvm->irq_routing); e && e->set; e++) {
> -		if (e->gsi == irq) {
> -			int r = e->set(e, kvm, sig_level);
> -			if (r < 0)
> -				continue;
> +	irq_rt = rcu_dereference(kvm->irq_routing);
> +	hlist_for_each_entry(e, n, &irq_rt->map[irq], link) {
> +		int r = e->set(e, kvm, sig_level);
> +		if (r < 0)
> +			continue;
>  
> -			ret = r + ((ret < 0) ? 0 : ret);
> -		}
> +		ret = r + ((ret < 0) ? 0 : ret);
>  	}
>  	rcu_read_unlock();
>  	return ret;
> @@ -162,21 +163,16 @@ int kvm_set_irq(struct kvm *kvm, int irq_source_id, int irq, int level)
>  
>  void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin)
>  {
> -	struct kvm_kernel_irq_routing_entry *e;
>  	struct kvm_irq_ack_notifier *kian;
>  	struct hlist_node *n;
> -	unsigned gsi = pin;
> +	unsigned gsi;
>  
>  	trace_kvm_ack_irq(irqchip, pin);
>  
>  	rcu_read_lock();
> -	for (e = rcu_dereference(kvm->irq_routing); e && e->set; e++) {
> -		if (e->irqchip.irqchip == irqchip &&
> -		    e->irqchip.pin == pin) {
> -			gsi = e->gsi;
> -			break;
> -		}
> -	}
> +	gsi = rcu_dereference(kvm->irq_routing)->chip[irqchip][pin];
> +	if (gsi == -1)
> +		gsi = pin;
>  
>  	hlist_for_each_entry_rcu(kian, n, &kvm->irq_ack_notifier_list, link)
>  		if (kian->gsi == gsi)
> @@ -277,7 +273,8 @@ void kvm_free_irq_routing(struct kvm *kvm)
>  	kfree(kvm->irq_routing);
>  }
>  
> -static int setup_routing_entry(struct kvm_kernel_irq_routing_entry *e,
> +static int setup_routing_entry(struct kvm_irq_routing_table *rt,
> +			       struct kvm_kernel_irq_routing_entry *e,
>  			       const struct kvm_irq_routing_entry *ue)
>  {
>  	int r = -EINVAL;
> @@ -303,6 +300,7 @@ static int setup_routing_entry(struct kvm_kernel_irq_routing_entry *e,
>  		}
>  		e->irqchip.irqchip = ue->u.irqchip.irqchip;
>  		e->irqchip.pin = ue->u.irqchip.pin + delta;
> +		rt->chip[ue->u.irqchip.irqchip][e->irqchip.pin] = ue->gsi;
>  		break;
>  	case KVM_IRQ_ROUTING_MSI:
>  		e->set = kvm_set_msi;
> @@ -313,6 +311,8 @@ static int setup_routing_entry(struct kvm_kernel_irq_routing_entry *e,
>  	default:
>  		goto out;
>  	}
> +
> +	hlist_add_head(&e->link, &rt->map[e->gsi]);
>  	r = 0;
>  out:
>  	return r;
> @@ -324,23 +324,37 @@ int kvm_set_irq_routing(struct kvm *kvm,
>  			unsigned nr,
>  			unsigned flags)
>  {
> -	struct kvm_kernel_irq_routing_entry *new, *old;
> -	unsigned i;
> +	struct kvm_irq_routing_table *new, *old;
> +	u32 i, j, max_gsi = 0;
>  	int r;
>  
> -	/* last elemet is left zeored and indicates the end of the array */
> -	new = kzalloc(sizeof(*new) * (nr + 1), GFP_KERNEL);
> +	for (i = 0; i < nr; ++i) {
> +		if (ue[i].gsi >= KVM_MAX_IRQ_ROUTES)
> +			return -EINVAL;
> +		max_gsi = max(max_gsi, ue[i].gsi);
> +	}
> +
> +	max_gsi += 1;
> +
> +	new = kzalloc(sizeof(*new) + (max_gsi * sizeof(struct hlist_head)) +
> +		      (nr * sizeof(struct kvm_kernel_irq_routing_entry)),
> +		      GFP_KERNEL);

Why don't you allocate the hlist_head's and the routing entries
separately?

>  
>  	if (!new)
>  		return -ENOMEM;
>  
> +	new->rt_entries = (void *)&new->map[max_gsi];
> +
> +	new->max_gsi = max_gsi;
> +	for (i = 0; i < 3; i++)
> +		for (j = 0; j < KVM_IOAPIC_NUM_PINS; j++)
> +			new->chip[i][j] = -1;
> +

Should use something else instead of 3. Maybe dynamic for multiple
IOAPIC's support (but you can argue thats another problem).

>  	for (i = 0; i < nr; ++i) {
>  		r = -EINVAL;
> -		if (ue->gsi >= KVM_MAX_IRQ_ROUTES)
> -			goto out;
>  		if (ue->flags)
>  			goto out;
> -		r = setup_routing_entry(new + i, ue);
> +		r = setup_routing_entry(new, &new->rt_entries[i], ue);
>  		if (r)
>  			goto out;
>  		++ue;
> -- 
> 1.6.2.1
> 
> --
> To unsubscribe from this list: send the line "unsubscribe kvm" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Michael S. Tsirkin July 15, 2009, 7:17 p.m. UTC | #2
On Tue, Jul 14, 2009 at 05:30:45PM +0300, Gleb Natapov wrote:
> @@ -147,14 +149,13 @@ int kvm_set_irq(struct kvm *kvm, int irq_source_id, int irq, int level)
>  	 * writes to the unused one.
>  	 */
>  	rcu_read_lock();
> -	for (e = rcu_dereference(kvm->irq_routing); e && e->set; e++) {
> -		if (e->gsi == irq) {
> -			int r = e->set(e, kvm, sig_level);
> -			if (r < 0)
> -				continue;
> +	irq_rt = rcu_dereference(kvm->irq_routing);
> +	hlist_for_each_entry(e, n, &irq_rt->map[irq], link) {

Don't you need to range-check irq? E.g. with irqfd, gsi is
controlled by guest.

> +		int r = e->set(e, kvm, sig_level);
> +		if (r < 0)
> +			continue;
>  
> -			ret = r + ((ret < 0) ? 0 : ret);
> -		}
> +		ret = r + ((ret < 0) ? 0 : ret);
>  	}
>  	rcu_read_unlock();
>  	return ret;
> @@ -162,21 +163,16 @@ int kvm_set_irq(struct kvm *kvm, int irq_source_id, int irq, int level)
>  
>  void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin)
>  {
> -	struct kvm_kernel_irq_routing_entry *e;
>  	struct kvm_irq_ack_notifier *kian;
>  	struct hlist_node *n;
> -	unsigned gsi = pin;
> +	unsigned gsi;
>  
>  	trace_kvm_ack_irq(irqchip, pin);
>  
>  	rcu_read_lock();
> -	for (e = rcu_dereference(kvm->irq_routing); e && e->set; e++) {
> -		if (e->irqchip.irqchip == irqchip &&
> -		    e->irqchip.pin == pin) {
> -			gsi = e->gsi;
> -			break;
> -		}
> -	}
> +	gsi = rcu_dereference(kvm->irq_routing)->chip[irqchip][pin];

And possibly here as well. Can guest control pin?

> +	if (gsi == -1)
> +		gsi = pin;
>  
>  	hlist_for_each_entry_rcu(kian, n, &kvm->irq_ack_notifier_list, link)
>  		if (kian->gsi == gsi)
> @@ -277,7 +273,8 @@ void kvm_free_irq_routing(struct kvm *kvm)
>  	kfree(kvm->irq_routing);
>  }
>  
> -static int setup_routing_entry(struct kvm_kernel_irq_routing_entry *e,
> +static int setup_routing_entry(struct kvm_irq_routing_table *rt,
> +			       struct kvm_kernel_irq_routing_entry *e,
>  			       const struct kvm_irq_routing_entry *ue)
>  {
>  	int r = -EINVAL;
> @@ -303,6 +300,7 @@ static int setup_routing_entry(struct kvm_kernel_irq_routing_entry *e,
>  		}
>  		e->irqchip.irqchip = ue->u.irqchip.irqchip;
>  		e->irqchip.pin = ue->u.irqchip.pin + delta;
> +		rt->chip[ue->u.irqchip.irqchip][e->irqchip.pin] = ue->gsi;
>  		break;
>  	case KVM_IRQ_ROUTING_MSI:
>  		e->set = kvm_set_msi;
> @@ -313,6 +311,8 @@ static int setup_routing_entry(struct kvm_kernel_irq_routing_entry *e,
>  	default:
>  		goto out;
>  	}
> +
> +	hlist_add_head(&e->link, &rt->map[e->gsi]);
>  	r = 0;
>  out:
>  	return r;
> @@ -324,23 +324,37 @@ int kvm_set_irq_routing(struct kvm *kvm,
>  			unsigned nr,
>  			unsigned flags)
>  {
> -	struct kvm_kernel_irq_routing_entry *new, *old;
> -	unsigned i;
> +	struct kvm_irq_routing_table *new, *old;
> +	u32 i, j, max_gsi = 0;
>  	int r;
>  
> -	/* last elemet is left zeored and indicates the end of the array */
> -	new = kzalloc(sizeof(*new) * (nr + 1), GFP_KERNEL);
> +	for (i = 0; i < nr; ++i) {
> +		if (ue[i].gsi >= KVM_MAX_IRQ_ROUTES)
> +			return -EINVAL;
> +		max_gsi = max(max_gsi, ue[i].gsi);
> +	}
> +
> +	max_gsi += 1;
> +
> +	new = kzalloc(sizeof(*new) + (max_gsi * sizeof(struct hlist_head)) +
> +		      (nr * sizeof(struct kvm_kernel_irq_routing_entry)),
> +		      GFP_KERNEL);
>  
>  	if (!new)
>  		return -ENOMEM;
>  
> +	new->rt_entries = (void *)&new->map[max_gsi];
> +
> +	new->max_gsi = max_gsi;
> +	for (i = 0; i < 3; i++)
> +		for (j = 0; j < KVM_IOAPIC_NUM_PINS; j++)
> +			new->chip[i][j] = -1;
> +
>  	for (i = 0; i < nr; ++i) {
>  		r = -EINVAL;
> -		if (ue->gsi >= KVM_MAX_IRQ_ROUTES)
> -			goto out;
>  		if (ue->flags)
>  			goto out;
> -		r = setup_routing_entry(new + i, ue);
> +		r = setup_routing_entry(new, &new->rt_entries[i], ue);
>  		if (r)
>  			goto out;
>  		++ue;
> -- 
> 1.6.2.1
> 
> --
> To unsubscribe from this list: send the line "unsubscribe kvm" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Gleb Natapov July 15, 2009, 8:48 p.m. UTC | #3
On Wed, Jul 15, 2009 at 10:17:22PM +0300, Michael S. Tsirkin wrote:
> On Tue, Jul 14, 2009 at 05:30:45PM +0300, Gleb Natapov wrote:
> > @@ -147,14 +149,13 @@ int kvm_set_irq(struct kvm *kvm, int irq_source_id, int irq, int level)
> >  	 * writes to the unused one.
> >  	 */
> >  	rcu_read_lock();
> > -	for (e = rcu_dereference(kvm->irq_routing); e && e->set; e++) {
> > -		if (e->gsi == irq) {
> > -			int r = e->set(e, kvm, sig_level);
> > -			if (r < 0)
> > -				continue;
> > +	irq_rt = rcu_dereference(kvm->irq_routing);
> > +	hlist_for_each_entry(e, n, &irq_rt->map[irq], link) {
> 
> Don't you need to range-check irq? E.g. with irqfd, gsi is
> controlled by guest.
> 
Yes, I need to add range checking. Good point.

> > +		int r = e->set(e, kvm, sig_level);
> > +		if (r < 0)
> > +			continue;
> >  
> > -			ret = r + ((ret < 0) ? 0 : ret);
> > -		}
> > +		ret = r + ((ret < 0) ? 0 : ret);
> >  	}
> >  	rcu_read_unlock();
> >  	return ret;
> > @@ -162,21 +163,16 @@ int kvm_set_irq(struct kvm *kvm, int irq_source_id, int irq, int level)
> >  
> >  void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin)
> >  {
> > -	struct kvm_kernel_irq_routing_entry *e;
> >  	struct kvm_irq_ack_notifier *kian;
> >  	struct hlist_node *n;
> > -	unsigned gsi = pin;
> > +	unsigned gsi;
> >  
> >  	trace_kvm_ack_irq(irqchip, pin);
> >  
> >  	rcu_read_lock();
> > -	for (e = rcu_dereference(kvm->irq_routing); e && e->set; e++) {
> > -		if (e->irqchip.irqchip == irqchip &&
> > -		    e->irqchip.pin == pin) {
> > -			gsi = e->gsi;
> > -			break;
> > -		}
> > -	}
> > +	gsi = rcu_dereference(kvm->irq_routing)->chip[irqchip][pin];
> 
> And possibly here as well. Can guest control pin?
> 
> > +	if (gsi == -1)
> > +		gsi = pin;
> >  
> >  	hlist_for_each_entry_rcu(kian, n, &kvm->irq_ack_notifier_list, link)
> >  		if (kian->gsi == gsi)
> > @@ -277,7 +273,8 @@ void kvm_free_irq_routing(struct kvm *kvm)
> >  	kfree(kvm->irq_routing);
> >  }
> >  
> > -static int setup_routing_entry(struct kvm_kernel_irq_routing_entry *e,
> > +static int setup_routing_entry(struct kvm_irq_routing_table *rt,
> > +			       struct kvm_kernel_irq_routing_entry *e,
> >  			       const struct kvm_irq_routing_entry *ue)
> >  {
> >  	int r = -EINVAL;
> > @@ -303,6 +300,7 @@ static int setup_routing_entry(struct kvm_kernel_irq_routing_entry *e,
> >  		}
> >  		e->irqchip.irqchip = ue->u.irqchip.irqchip;
> >  		e->irqchip.pin = ue->u.irqchip.pin + delta;
> > +		rt->chip[ue->u.irqchip.irqchip][e->irqchip.pin] = ue->gsi;
> >  		break;
> >  	case KVM_IRQ_ROUTING_MSI:
> >  		e->set = kvm_set_msi;
> > @@ -313,6 +311,8 @@ static int setup_routing_entry(struct kvm_kernel_irq_routing_entry *e,
> >  	default:
> >  		goto out;
> >  	}
> > +
> > +	hlist_add_head(&e->link, &rt->map[e->gsi]);
> >  	r = 0;
> >  out:
> >  	return r;
> > @@ -324,23 +324,37 @@ int kvm_set_irq_routing(struct kvm *kvm,
> >  			unsigned nr,
> >  			unsigned flags)
> >  {
> > -	struct kvm_kernel_irq_routing_entry *new, *old;
> > -	unsigned i;
> > +	struct kvm_irq_routing_table *new, *old;
> > +	u32 i, j, max_gsi = 0;
> >  	int r;
> >  
> > -	/* last elemet is left zeored and indicates the end of the array */
> > -	new = kzalloc(sizeof(*new) * (nr + 1), GFP_KERNEL);
> > +	for (i = 0; i < nr; ++i) {
> > +		if (ue[i].gsi >= KVM_MAX_IRQ_ROUTES)
> > +			return -EINVAL;
> > +		max_gsi = max(max_gsi, ue[i].gsi);
> > +	}
> > +
> > +	max_gsi += 1;
> > +
> > +	new = kzalloc(sizeof(*new) + (max_gsi * sizeof(struct hlist_head)) +
> > +		      (nr * sizeof(struct kvm_kernel_irq_routing_entry)),
> > +		      GFP_KERNEL);
> >  
> >  	if (!new)
> >  		return -ENOMEM;
> >  
> > +	new->rt_entries = (void *)&new->map[max_gsi];
> > +
> > +	new->max_gsi = max_gsi;
> > +	for (i = 0; i < 3; i++)
> > +		for (j = 0; j < KVM_IOAPIC_NUM_PINS; j++)
> > +			new->chip[i][j] = -1;
> > +
> >  	for (i = 0; i < nr; ++i) {
> >  		r = -EINVAL;
> > -		if (ue->gsi >= KVM_MAX_IRQ_ROUTES)
> > -			goto out;
> >  		if (ue->flags)
> >  			goto out;
> > -		r = setup_routing_entry(new + i, ue);
> > +		r = setup_routing_entry(new, &new->rt_entries[i], ue);
> >  		if (r)
> >  			goto out;
> >  		++ue;
> > -- 
> > 1.6.2.1
> > 
> > --
> > To unsubscribe from this list: send the line "unsubscribe kvm" in
> > the body of a message to majordomo@vger.kernel.org
> > More majordomo info at  http://vger.kernel.org/majordomo-info.html

--
			Gleb.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Gleb Natapov July 15, 2009, 8:52 p.m. UTC | #4
On Wed, Jul 15, 2009 at 03:18:00PM -0300, Marcelo Tosatti wrote:
> On Tue, Jul 14, 2009 at 05:30:45PM +0300, Gleb Natapov wrote:
> > Use gsi indexed array instead of scanning all entries on each interrupt
> > injection. Also maintain back mapping from irqchip/pin to gsi to speedup
> > interrupt acknowledgment notifications.
> > 
> > Signed-off-by: Gleb Natapov <gleb@redhat.com>
> > ---
> >  include/linux/kvm_host.h |   11 ++++++-
> >  virt/kvm/irq_comm.c      |   62 ++++++++++++++++++++++++++++-----------------
> >  2 files changed, 47 insertions(+), 26 deletions(-)
> > 
> > diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
> > index aa64d0d..ae6cbf1 100644
> > --- a/include/linux/kvm_host.h
> > +++ b/include/linux/kvm_host.h
> > @@ -128,7 +128,14 @@ struct kvm_kernel_irq_routing_entry {
> >  		} irqchip;
> >  		struct msi_msg msi;
> >  	};
> > -	struct list_head link;
> > +	struct hlist_node link;
> > +};
> > +
> > +struct kvm_irq_routing_table {
> > +	int chip[3][KVM_IOAPIC_NUM_PINS];
> > +	struct kvm_kernel_irq_routing_entry *rt_entries;
> > +	u32 max_gsi;
> > +	struct hlist_head map[0];
> >  };
> >  
> >  struct kvm {
> > @@ -165,7 +172,7 @@ struct kvm {
> >  #endif
> >  
> >  #ifdef CONFIG_HAVE_KVM_IRQCHIP
> > -	struct kvm_kernel_irq_routing_entry *irq_routing;
> > +	struct kvm_irq_routing_table *irq_routing;
> >  	spinlock_t irq_routing_lock;
> >  	struct hlist_head mask_notifier_list;
> >  	struct hlist_head irq_ack_notifier_list;
> > diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c
> > index c54a28b..da643d4 100644
> > --- a/virt/kvm/irq_comm.c
> > +++ b/virt/kvm/irq_comm.c
> > @@ -125,6 +125,8 @@ int kvm_set_irq(struct kvm *kvm, int irq_source_id, int irq, int level)
> >  	struct kvm_kernel_irq_routing_entry *e;
> >  	unsigned long *irq_state, sig_level;
> >  	int ret = -1;
> > +	struct kvm_irq_routing_table *irq_rt;
> > +	struct hlist_node *n;
> >  
> >  	trace_kvm_set_irq(irq, level, irq_source_id);
> >  
> > @@ -147,14 +149,13 @@ int kvm_set_irq(struct kvm *kvm, int irq_source_id, int irq, int level)
> >  	 * writes to the unused one.
> >  	 */
> >  	rcu_read_lock();
> > -	for (e = rcu_dereference(kvm->irq_routing); e && e->set; e++) {
> > -		if (e->gsi == irq) {
> > -			int r = e->set(e, kvm, sig_level);
> > -			if (r < 0)
> > -				continue;
> > +	irq_rt = rcu_dereference(kvm->irq_routing);
> > +	hlist_for_each_entry(e, n, &irq_rt->map[irq], link) {
> > +		int r = e->set(e, kvm, sig_level);
> > +		if (r < 0)
> > +			continue;
> >  
> > -			ret = r + ((ret < 0) ? 0 : ret);
> > -		}
> > +		ret = r + ((ret < 0) ? 0 : ret);
> >  	}
> >  	rcu_read_unlock();
> >  	return ret;
> > @@ -162,21 +163,16 @@ int kvm_set_irq(struct kvm *kvm, int irq_source_id, int irq, int level)
> >  
> >  void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin)
> >  {
> > -	struct kvm_kernel_irq_routing_entry *e;
> >  	struct kvm_irq_ack_notifier *kian;
> >  	struct hlist_node *n;
> > -	unsigned gsi = pin;
> > +	unsigned gsi;
> >  
> >  	trace_kvm_ack_irq(irqchip, pin);
> >  
> >  	rcu_read_lock();
> > -	for (e = rcu_dereference(kvm->irq_routing); e && e->set; e++) {
> > -		if (e->irqchip.irqchip == irqchip &&
> > -		    e->irqchip.pin == pin) {
> > -			gsi = e->gsi;
> > -			break;
> > -		}
> > -	}
> > +	gsi = rcu_dereference(kvm->irq_routing)->chip[irqchip][pin];
> > +	if (gsi == -1)
> > +		gsi = pin;
> >  
> >  	hlist_for_each_entry_rcu(kian, n, &kvm->irq_ack_notifier_list, link)
> >  		if (kian->gsi == gsi)
> > @@ -277,7 +273,8 @@ void kvm_free_irq_routing(struct kvm *kvm)
> >  	kfree(kvm->irq_routing);
> >  }
> >  
> > -static int setup_routing_entry(struct kvm_kernel_irq_routing_entry *e,
> > +static int setup_routing_entry(struct kvm_irq_routing_table *rt,
> > +			       struct kvm_kernel_irq_routing_entry *e,
> >  			       const struct kvm_irq_routing_entry *ue)
> >  {
> >  	int r = -EINVAL;
> > @@ -303,6 +300,7 @@ static int setup_routing_entry(struct kvm_kernel_irq_routing_entry *e,
> >  		}
> >  		e->irqchip.irqchip = ue->u.irqchip.irqchip;
> >  		e->irqchip.pin = ue->u.irqchip.pin + delta;
> > +		rt->chip[ue->u.irqchip.irqchip][e->irqchip.pin] = ue->gsi;
> >  		break;
> >  	case KVM_IRQ_ROUTING_MSI:
> >  		e->set = kvm_set_msi;
> > @@ -313,6 +311,8 @@ static int setup_routing_entry(struct kvm_kernel_irq_routing_entry *e,
> >  	default:
> >  		goto out;
> >  	}
> > +
> > +	hlist_add_head(&e->link, &rt->map[e->gsi]);
> >  	r = 0;
> >  out:
> >  	return r;
> > @@ -324,23 +324,37 @@ int kvm_set_irq_routing(struct kvm *kvm,
> >  			unsigned nr,
> >  			unsigned flags)
> >  {
> > -	struct kvm_kernel_irq_routing_entry *new, *old;
> > -	unsigned i;
> > +	struct kvm_irq_routing_table *new, *old;
> > +	u32 i, j, max_gsi = 0;
> >  	int r;
> >  
> > -	/* last elemet is left zeored and indicates the end of the array */
> > -	new = kzalloc(sizeof(*new) * (nr + 1), GFP_KERNEL);
> > +	for (i = 0; i < nr; ++i) {
> > +		if (ue[i].gsi >= KVM_MAX_IRQ_ROUTES)
> > +			return -EINVAL;
> > +		max_gsi = max(max_gsi, ue[i].gsi);
> > +	}
> > +
> > +	max_gsi += 1;
> > +
> > +	new = kzalloc(sizeof(*new) + (max_gsi * sizeof(struct hlist_head)) +
> > +		      (nr * sizeof(struct kvm_kernel_irq_routing_entry)),
> > +		      GFP_KERNEL);
> 
> Why don't you allocate the hlist_head's and the routing entries
> separately?
> 
I prefer it that way because cleanup after error is much easier. What
are the disadvantages?

> >  
> >  	if (!new)
> >  		return -ENOMEM;
> >  
> > +	new->rt_entries = (void *)&new->map[max_gsi];
> > +
> > +	new->max_gsi = max_gsi;
> > +	for (i = 0; i < 3; i++)
> > +		for (j = 0; j < KVM_IOAPIC_NUM_PINS; j++)
> > +			new->chip[i][j] = -1;
> > +
> 
> Should use something else instead of 3. Maybe dynamic for multiple
> IOAPIC's support (but you can argue thats another problem).
> 
This is (another problem). The code has 1 IOAPIC hardcoded pretty deeply
even at user/kernel API level. We will solve is some day.
 
> >  	for (i = 0; i < nr; ++i) {
> >  		r = -EINVAL;
> > -		if (ue->gsi >= KVM_MAX_IRQ_ROUTES)
> > -			goto out;
> >  		if (ue->flags)
> >  			goto out;
> > -		r = setup_routing_entry(new + i, ue);
> > +		r = setup_routing_entry(new, &new->rt_entries[i], ue);
> >  		if (r)
> >  			goto out;
> >  		++ue;
> > -- 
> > 1.6.2.1
> > 
> > --
> > To unsubscribe from this list: send the line "unsubscribe kvm" in
> > the body of a message to majordomo@vger.kernel.org
> > More majordomo info at  http://vger.kernel.org/majordomo-info.html

--
			Gleb.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Marcelo Tosatti July 15, 2009, 9:42 p.m. UTC | #5
On Wed, Jul 15, 2009 at 11:52:24PM +0300, Gleb Natapov wrote:
> On Wed, Jul 15, 2009 at 03:18:00PM -0300, Marcelo Tosatti wrote:
> > On Tue, Jul 14, 2009 at 05:30:45PM +0300, Gleb Natapov wrote:
> > > Use gsi indexed array instead of scanning all entries on each interrupt
> > > injection. Also maintain back mapping from irqchip/pin to gsi to speedup
> > > interrupt acknowledgment notifications.
> > > 
> > > Signed-off-by: Gleb Natapov <gleb@redhat.com>
> > > ---
> > >  include/linux/kvm_host.h |   11 ++++++-
> > >  virt/kvm/irq_comm.c      |   62 ++++++++++++++++++++++++++++-----------------
> > >  2 files changed, 47 insertions(+), 26 deletions(-)
> > > 
> > > diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
> > > index aa64d0d..ae6cbf1 100644
> > > --- a/include/linux/kvm_host.h
> > > +++ b/include/linux/kvm_host.h
> > > @@ -128,7 +128,14 @@ struct kvm_kernel_irq_routing_entry {
> > >  		} irqchip;
> > >  		struct msi_msg msi;
> > >  	};
> > > -	struct list_head link;
> > > +	struct hlist_node link;
> > > +};
> > > +
> > > +struct kvm_irq_routing_table {
> > > +	int chip[3][KVM_IOAPIC_NUM_PINS];
> > > +	struct kvm_kernel_irq_routing_entry *rt_entries;
> > > +	u32 max_gsi;
> > > +	struct hlist_head map[0];
> > >  };
> > >  
> > >  struct kvm {
> > > @@ -165,7 +172,7 @@ struct kvm {
> > >  #endif
> > >  
> > >  #ifdef CONFIG_HAVE_KVM_IRQCHIP
> > > -	struct kvm_kernel_irq_routing_entry *irq_routing;
> > > +	struct kvm_irq_routing_table *irq_routing;
> > >  	spinlock_t irq_routing_lock;
> > >  	struct hlist_head mask_notifier_list;
> > >  	struct hlist_head irq_ack_notifier_list;
> > > diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c
> > > index c54a28b..da643d4 100644
> > > --- a/virt/kvm/irq_comm.c
> > > +++ b/virt/kvm/irq_comm.c
> > > @@ -125,6 +125,8 @@ int kvm_set_irq(struct kvm *kvm, int irq_source_id, int irq, int level)
> > >  	struct kvm_kernel_irq_routing_entry *e;
> > >  	unsigned long *irq_state, sig_level;
> > >  	int ret = -1;
> > > +	struct kvm_irq_routing_table *irq_rt;
> > > +	struct hlist_node *n;
> > >  
> > >  	trace_kvm_set_irq(irq, level, irq_source_id);
> > >  
> > > @@ -147,14 +149,13 @@ int kvm_set_irq(struct kvm *kvm, int irq_source_id, int irq, int level)
> > >  	 * writes to the unused one.
> > >  	 */
> > >  	rcu_read_lock();
> > > -	for (e = rcu_dereference(kvm->irq_routing); e && e->set; e++) {
> > > -		if (e->gsi == irq) {
> > > -			int r = e->set(e, kvm, sig_level);
> > > -			if (r < 0)
> > > -				continue;
> > > +	irq_rt = rcu_dereference(kvm->irq_routing);
> > > +	hlist_for_each_entry(e, n, &irq_rt->map[irq], link) {
> > > +		int r = e->set(e, kvm, sig_level);
> > > +		if (r < 0)
> > > +			continue;
> > >  
> > > -			ret = r + ((ret < 0) ? 0 : ret);
> > > -		}
> > > +		ret = r + ((ret < 0) ? 0 : ret);
> > >  	}
> > >  	rcu_read_unlock();
> > >  	return ret;
> > > @@ -162,21 +163,16 @@ int kvm_set_irq(struct kvm *kvm, int irq_source_id, int irq, int level)
> > >  
> > >  void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin)
> > >  {
> > > -	struct kvm_kernel_irq_routing_entry *e;
> > >  	struct kvm_irq_ack_notifier *kian;
> > >  	struct hlist_node *n;
> > > -	unsigned gsi = pin;
> > > +	unsigned gsi;
> > >  
> > >  	trace_kvm_ack_irq(irqchip, pin);
> > >  
> > >  	rcu_read_lock();
> > > -	for (e = rcu_dereference(kvm->irq_routing); e && e->set; e++) {
> > > -		if (e->irqchip.irqchip == irqchip &&
> > > -		    e->irqchip.pin == pin) {
> > > -			gsi = e->gsi;
> > > -			break;
> > > -		}
> > > -	}
> > > +	gsi = rcu_dereference(kvm->irq_routing)->chip[irqchip][pin];
> > > +	if (gsi == -1)
> > > +		gsi = pin;
> > >  
> > >  	hlist_for_each_entry_rcu(kian, n, &kvm->irq_ack_notifier_list, link)
> > >  		if (kian->gsi == gsi)
> > > @@ -277,7 +273,8 @@ void kvm_free_irq_routing(struct kvm *kvm)
> > >  	kfree(kvm->irq_routing);
> > >  }
> > >  
> > > -static int setup_routing_entry(struct kvm_kernel_irq_routing_entry *e,
> > > +static int setup_routing_entry(struct kvm_irq_routing_table *rt,
> > > +			       struct kvm_kernel_irq_routing_entry *e,
> > >  			       const struct kvm_irq_routing_entry *ue)
> > >  {
> > >  	int r = -EINVAL;
> > > @@ -303,6 +300,7 @@ static int setup_routing_entry(struct kvm_kernel_irq_routing_entry *e,
> > >  		}
> > >  		e->irqchip.irqchip = ue->u.irqchip.irqchip;
> > >  		e->irqchip.pin = ue->u.irqchip.pin + delta;
> > > +		rt->chip[ue->u.irqchip.irqchip][e->irqchip.pin] = ue->gsi;
> > >  		break;
> > >  	case KVM_IRQ_ROUTING_MSI:
> > >  		e->set = kvm_set_msi;
> > > @@ -313,6 +311,8 @@ static int setup_routing_entry(struct kvm_kernel_irq_routing_entry *e,
> > >  	default:
> > >  		goto out;
> > >  	}
> > > +
> > > +	hlist_add_head(&e->link, &rt->map[e->gsi]);
> > >  	r = 0;
> > >  out:
> > >  	return r;
> > > @@ -324,23 +324,37 @@ int kvm_set_irq_routing(struct kvm *kvm,
> > >  			unsigned nr,
> > >  			unsigned flags)
> > >  {
> > > -	struct kvm_kernel_irq_routing_entry *new, *old;
> > > -	unsigned i;
> > > +	struct kvm_irq_routing_table *new, *old;
> > > +	u32 i, j, max_gsi = 0;
> > >  	int r;
> > >  
> > > -	/* last elemet is left zeored and indicates the end of the array */
> > > -	new = kzalloc(sizeof(*new) * (nr + 1), GFP_KERNEL);
> > > +	for (i = 0; i < nr; ++i) {
> > > +		if (ue[i].gsi >= KVM_MAX_IRQ_ROUTES)
> > > +			return -EINVAL;
> > > +		max_gsi = max(max_gsi, ue[i].gsi);
> > > +	}
> > > +
> > > +	max_gsi += 1;
> > > +
> > > +	new = kzalloc(sizeof(*new) + (max_gsi * sizeof(struct hlist_head)) +
> > > +		      (nr * sizeof(struct kvm_kernel_irq_routing_entry)),
> > > +		      GFP_KERNEL);
> > 
> > Why don't you allocate the hlist_head's and the routing entries
> > separately?
> > 
> I prefer it that way because cleanup after error is much easier. What
> are the disadvantages?

They are two data structures (two different arrays). Also as mentioned
before by others the allocation size of irq_routing array might become
an issue.

> > >  
> > >  	if (!new)
> > >  		return -ENOMEM;
> > >  
> > > +	new->rt_entries = (void *)&new->map[max_gsi];
> > > +
> > > +	new->max_gsi = max_gsi;
> > > +	for (i = 0; i < 3; i++)
> > > +		for (j = 0; j < KVM_IOAPIC_NUM_PINS; j++)
> > > +			new->chip[i][j] = -1;
> > > +
> > 
> > Should use something else instead of 3. Maybe dynamic for multiple
> > IOAPIC's support (but you can argue thats another problem).
> > 
> This is (another problem). The code has 1 IOAPIC hardcoded pretty deeply
> even at user/kernel API level. We will solve is some day.

OK
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Gleb Natapov July 16, 2009, 6:05 a.m. UTC | #6
On Wed, Jul 15, 2009 at 06:42:05PM -0300, Marcelo Tosatti wrote:
> On Wed, Jul 15, 2009 at 11:52:24PM +0300, Gleb Natapov wrote:
> > On Wed, Jul 15, 2009 at 03:18:00PM -0300, Marcelo Tosatti wrote:
> > > On Tue, Jul 14, 2009 at 05:30:45PM +0300, Gleb Natapov wrote:
> > > > Use gsi indexed array instead of scanning all entries on each interrupt
> > > > injection. Also maintain back mapping from irqchip/pin to gsi to speedup
> > > > interrupt acknowledgment notifications.
> > > > 
> > > > Signed-off-by: Gleb Natapov <gleb@redhat.com>
> > > > ---
> > > >  include/linux/kvm_host.h |   11 ++++++-
> > > >  virt/kvm/irq_comm.c      |   62 ++++++++++++++++++++++++++++-----------------
> > > >  2 files changed, 47 insertions(+), 26 deletions(-)
> > > > 
> > > > diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
> > > > index aa64d0d..ae6cbf1 100644
> > > > --- a/include/linux/kvm_host.h
> > > > +++ b/include/linux/kvm_host.h
> > > > @@ -128,7 +128,14 @@ struct kvm_kernel_irq_routing_entry {
> > > >  		} irqchip;
> > > >  		struct msi_msg msi;
> > > >  	};
> > > > -	struct list_head link;
> > > > +	struct hlist_node link;
> > > > +};
> > > > +
> > > > +struct kvm_irq_routing_table {
> > > > +	int chip[3][KVM_IOAPIC_NUM_PINS];
> > > > +	struct kvm_kernel_irq_routing_entry *rt_entries;
> > > > +	u32 max_gsi;
> > > > +	struct hlist_head map[0];
> > > >  };
> > > >  
> > > >  struct kvm {
> > > > @@ -165,7 +172,7 @@ struct kvm {
> > > >  #endif
> > > >  
> > > >  #ifdef CONFIG_HAVE_KVM_IRQCHIP
> > > > -	struct kvm_kernel_irq_routing_entry *irq_routing;
> > > > +	struct kvm_irq_routing_table *irq_routing;
> > > >  	spinlock_t irq_routing_lock;
> > > >  	struct hlist_head mask_notifier_list;
> > > >  	struct hlist_head irq_ack_notifier_list;
> > > > diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c
> > > > index c54a28b..da643d4 100644
> > > > --- a/virt/kvm/irq_comm.c
> > > > +++ b/virt/kvm/irq_comm.c
> > > > @@ -125,6 +125,8 @@ int kvm_set_irq(struct kvm *kvm, int irq_source_id, int irq, int level)
> > > >  	struct kvm_kernel_irq_routing_entry *e;
> > > >  	unsigned long *irq_state, sig_level;
> > > >  	int ret = -1;
> > > > +	struct kvm_irq_routing_table *irq_rt;
> > > > +	struct hlist_node *n;
> > > >  
> > > >  	trace_kvm_set_irq(irq, level, irq_source_id);
> > > >  
> > > > @@ -147,14 +149,13 @@ int kvm_set_irq(struct kvm *kvm, int irq_source_id, int irq, int level)
> > > >  	 * writes to the unused one.
> > > >  	 */
> > > >  	rcu_read_lock();
> > > > -	for (e = rcu_dereference(kvm->irq_routing); e && e->set; e++) {
> > > > -		if (e->gsi == irq) {
> > > > -			int r = e->set(e, kvm, sig_level);
> > > > -			if (r < 0)
> > > > -				continue;
> > > > +	irq_rt = rcu_dereference(kvm->irq_routing);
> > > > +	hlist_for_each_entry(e, n, &irq_rt->map[irq], link) {
> > > > +		int r = e->set(e, kvm, sig_level);
> > > > +		if (r < 0)
> > > > +			continue;
> > > >  
> > > > -			ret = r + ((ret < 0) ? 0 : ret);
> > > > -		}
> > > > +		ret = r + ((ret < 0) ? 0 : ret);
> > > >  	}
> > > >  	rcu_read_unlock();
> > > >  	return ret;
> > > > @@ -162,21 +163,16 @@ int kvm_set_irq(struct kvm *kvm, int irq_source_id, int irq, int level)
> > > >  
> > > >  void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin)
> > > >  {
> > > > -	struct kvm_kernel_irq_routing_entry *e;
> > > >  	struct kvm_irq_ack_notifier *kian;
> > > >  	struct hlist_node *n;
> > > > -	unsigned gsi = pin;
> > > > +	unsigned gsi;
> > > >  
> > > >  	trace_kvm_ack_irq(irqchip, pin);
> > > >  
> > > >  	rcu_read_lock();
> > > > -	for (e = rcu_dereference(kvm->irq_routing); e && e->set; e++) {
> > > > -		if (e->irqchip.irqchip == irqchip &&
> > > > -		    e->irqchip.pin == pin) {
> > > > -			gsi = e->gsi;
> > > > -			break;
> > > > -		}
> > > > -	}
> > > > +	gsi = rcu_dereference(kvm->irq_routing)->chip[irqchip][pin];
> > > > +	if (gsi == -1)
> > > > +		gsi = pin;
> > > >  
> > > >  	hlist_for_each_entry_rcu(kian, n, &kvm->irq_ack_notifier_list, link)
> > > >  		if (kian->gsi == gsi)
> > > > @@ -277,7 +273,8 @@ void kvm_free_irq_routing(struct kvm *kvm)
> > > >  	kfree(kvm->irq_routing);
> > > >  }
> > > >  
> > > > -static int setup_routing_entry(struct kvm_kernel_irq_routing_entry *e,
> > > > +static int setup_routing_entry(struct kvm_irq_routing_table *rt,
> > > > +			       struct kvm_kernel_irq_routing_entry *e,
> > > >  			       const struct kvm_irq_routing_entry *ue)
> > > >  {
> > > >  	int r = -EINVAL;
> > > > @@ -303,6 +300,7 @@ static int setup_routing_entry(struct kvm_kernel_irq_routing_entry *e,
> > > >  		}
> > > >  		e->irqchip.irqchip = ue->u.irqchip.irqchip;
> > > >  		e->irqchip.pin = ue->u.irqchip.pin + delta;
> > > > +		rt->chip[ue->u.irqchip.irqchip][e->irqchip.pin] = ue->gsi;
> > > >  		break;
> > > >  	case KVM_IRQ_ROUTING_MSI:
> > > >  		e->set = kvm_set_msi;
> > > > @@ -313,6 +311,8 @@ static int setup_routing_entry(struct kvm_kernel_irq_routing_entry *e,
> > > >  	default:
> > > >  		goto out;
> > > >  	}
> > > > +
> > > > +	hlist_add_head(&e->link, &rt->map[e->gsi]);
> > > >  	r = 0;
> > > >  out:
> > > >  	return r;
> > > > @@ -324,23 +324,37 @@ int kvm_set_irq_routing(struct kvm *kvm,
> > > >  			unsigned nr,
> > > >  			unsigned flags)
> > > >  {
> > > > -	struct kvm_kernel_irq_routing_entry *new, *old;
> > > > -	unsigned i;
> > > > +	struct kvm_irq_routing_table *new, *old;
> > > > +	u32 i, j, max_gsi = 0;
> > > >  	int r;
> > > >  
> > > > -	/* last elemet is left zeored and indicates the end of the array */
> > > > -	new = kzalloc(sizeof(*new) * (nr + 1), GFP_KERNEL);
> > > > +	for (i = 0; i < nr; ++i) {
> > > > +		if (ue[i].gsi >= KVM_MAX_IRQ_ROUTES)
> > > > +			return -EINVAL;
> > > > +		max_gsi = max(max_gsi, ue[i].gsi);
> > > > +	}
> > > > +
> > > > +	max_gsi += 1;
> > > > +
> > > > +	new = kzalloc(sizeof(*new) + (max_gsi * sizeof(struct hlist_head)) +
> > > > +		      (nr * sizeof(struct kvm_kernel_irq_routing_entry)),
> > > > +		      GFP_KERNEL);
> > > 
> > > Why don't you allocate the hlist_head's and the routing entries
> > > separately?
> > > 
> > I prefer it that way because cleanup after error is much easier. What
> > are the disadvantages?
> 
> They are two data structures (two different arrays). Also as mentioned
Logically it is one data structure that includes two arrays and some
other fields.

> before by others the allocation size of irq_routing array might become
> an issue.
> 
KVM_MAX_IRQ_ROUTES will be significantly reduced so this will not be the
problem. I plan to reduce it to 128. Will it be OK to use vmalloc() if
the size is greater than one page?

> > > >  
> > > >  	if (!new)
> > > >  		return -ENOMEM;
> > > >  
> > > > +	new->rt_entries = (void *)&new->map[max_gsi];
> > > > +
> > > > +	new->max_gsi = max_gsi;
> > > > +	for (i = 0; i < 3; i++)
> > > > +		for (j = 0; j < KVM_IOAPIC_NUM_PINS; j++)
> > > > +			new->chip[i][j] = -1;
> > > > +
> > > 
> > > Should use something else instead of 3. Maybe dynamic for multiple
> > > IOAPIC's support (but you can argue thats another problem).
> > > 
> > This is (another problem). The code has 1 IOAPIC hardcoded pretty deeply
> > even at user/kernel API level. We will solve is some day.
> 
> OK

--
			Gleb.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index aa64d0d..ae6cbf1 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -128,7 +128,14 @@  struct kvm_kernel_irq_routing_entry {
 		} irqchip;
 		struct msi_msg msi;
 	};
-	struct list_head link;
+	struct hlist_node link;
+};
+
+struct kvm_irq_routing_table {
+	int chip[3][KVM_IOAPIC_NUM_PINS];
+	struct kvm_kernel_irq_routing_entry *rt_entries;
+	u32 max_gsi;
+	struct hlist_head map[0];
 };
 
 struct kvm {
@@ -165,7 +172,7 @@  struct kvm {
 #endif
 
 #ifdef CONFIG_HAVE_KVM_IRQCHIP
-	struct kvm_kernel_irq_routing_entry *irq_routing;
+	struct kvm_irq_routing_table *irq_routing;
 	spinlock_t irq_routing_lock;
 	struct hlist_head mask_notifier_list;
 	struct hlist_head irq_ack_notifier_list;
diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c
index c54a28b..da643d4 100644
--- a/virt/kvm/irq_comm.c
+++ b/virt/kvm/irq_comm.c
@@ -125,6 +125,8 @@  int kvm_set_irq(struct kvm *kvm, int irq_source_id, int irq, int level)
 	struct kvm_kernel_irq_routing_entry *e;
 	unsigned long *irq_state, sig_level;
 	int ret = -1;
+	struct kvm_irq_routing_table *irq_rt;
+	struct hlist_node *n;
 
 	trace_kvm_set_irq(irq, level, irq_source_id);
 
@@ -147,14 +149,13 @@  int kvm_set_irq(struct kvm *kvm, int irq_source_id, int irq, int level)
 	 * writes to the unused one.
 	 */
 	rcu_read_lock();
-	for (e = rcu_dereference(kvm->irq_routing); e && e->set; e++) {
-		if (e->gsi == irq) {
-			int r = e->set(e, kvm, sig_level);
-			if (r < 0)
-				continue;
+	irq_rt = rcu_dereference(kvm->irq_routing);
+	hlist_for_each_entry(e, n, &irq_rt->map[irq], link) {
+		int r = e->set(e, kvm, sig_level);
+		if (r < 0)
+			continue;
 
-			ret = r + ((ret < 0) ? 0 : ret);
-		}
+		ret = r + ((ret < 0) ? 0 : ret);
 	}
 	rcu_read_unlock();
 	return ret;
@@ -162,21 +163,16 @@  int kvm_set_irq(struct kvm *kvm, int irq_source_id, int irq, int level)
 
 void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin)
 {
-	struct kvm_kernel_irq_routing_entry *e;
 	struct kvm_irq_ack_notifier *kian;
 	struct hlist_node *n;
-	unsigned gsi = pin;
+	unsigned gsi;
 
 	trace_kvm_ack_irq(irqchip, pin);
 
 	rcu_read_lock();
-	for (e = rcu_dereference(kvm->irq_routing); e && e->set; e++) {
-		if (e->irqchip.irqchip == irqchip &&
-		    e->irqchip.pin == pin) {
-			gsi = e->gsi;
-			break;
-		}
-	}
+	gsi = rcu_dereference(kvm->irq_routing)->chip[irqchip][pin];
+	if (gsi == -1)
+		gsi = pin;
 
 	hlist_for_each_entry_rcu(kian, n, &kvm->irq_ack_notifier_list, link)
 		if (kian->gsi == gsi)
@@ -277,7 +273,8 @@  void kvm_free_irq_routing(struct kvm *kvm)
 	kfree(kvm->irq_routing);
 }
 
-static int setup_routing_entry(struct kvm_kernel_irq_routing_entry *e,
+static int setup_routing_entry(struct kvm_irq_routing_table *rt,
+			       struct kvm_kernel_irq_routing_entry *e,
 			       const struct kvm_irq_routing_entry *ue)
 {
 	int r = -EINVAL;
@@ -303,6 +300,7 @@  static int setup_routing_entry(struct kvm_kernel_irq_routing_entry *e,
 		}
 		e->irqchip.irqchip = ue->u.irqchip.irqchip;
 		e->irqchip.pin = ue->u.irqchip.pin + delta;
+		rt->chip[ue->u.irqchip.irqchip][e->irqchip.pin] = ue->gsi;
 		break;
 	case KVM_IRQ_ROUTING_MSI:
 		e->set = kvm_set_msi;
@@ -313,6 +311,8 @@  static int setup_routing_entry(struct kvm_kernel_irq_routing_entry *e,
 	default:
 		goto out;
 	}
+
+	hlist_add_head(&e->link, &rt->map[e->gsi]);
 	r = 0;
 out:
 	return r;
@@ -324,23 +324,37 @@  int kvm_set_irq_routing(struct kvm *kvm,
 			unsigned nr,
 			unsigned flags)
 {
-	struct kvm_kernel_irq_routing_entry *new, *old;
-	unsigned i;
+	struct kvm_irq_routing_table *new, *old;
+	u32 i, j, max_gsi = 0;
 	int r;
 
-	/* last elemet is left zeored and indicates the end of the array */
-	new = kzalloc(sizeof(*new) * (nr + 1), GFP_KERNEL);
+	for (i = 0; i < nr; ++i) {
+		if (ue[i].gsi >= KVM_MAX_IRQ_ROUTES)
+			return -EINVAL;
+		max_gsi = max(max_gsi, ue[i].gsi);
+	}
+
+	max_gsi += 1;
+
+	new = kzalloc(sizeof(*new) + (max_gsi * sizeof(struct hlist_head)) +
+		      (nr * sizeof(struct kvm_kernel_irq_routing_entry)),
+		      GFP_KERNEL);
 
 	if (!new)
 		return -ENOMEM;
 
+	new->rt_entries = (void *)&new->map[max_gsi];
+
+	new->max_gsi = max_gsi;
+	for (i = 0; i < 3; i++)
+		for (j = 0; j < KVM_IOAPIC_NUM_PINS; j++)
+			new->chip[i][j] = -1;
+
 	for (i = 0; i < nr; ++i) {
 		r = -EINVAL;
-		if (ue->gsi >= KVM_MAX_IRQ_ROUTES)
-			goto out;
 		if (ue->flags)
 			goto out;
-		r = setup_routing_entry(new + i, ue);
+		r = setup_routing_entry(new, &new->rt_entries[i], ue);
 		if (r)
 			goto out;
 		++ue;