diff mbox

[2/2] qemu-kvm: broken MSI routing work-around

Message ID 20090722144122.GC7942@redhat.com (mailing list archive)
State New, archived
Headers show

Commit Message

Michael S. Tsirkin July 22, 2009, 2:41 p.m. UTC
Kernels up to 2.6.31 have a bug: MSI entries where looked at when irq
is acked, address_lo was interpreted as irqchip, and
address_hi as pin.  If that matches a real interrupt this prevents
ack notifier from being processed.

Since these kernels ignore the value for address_hi when delivering
MSI, work around this by setting a value that never matches an
interrupt pin number.

Pointers to relevant kernel code, for reference: in kernel v2.6.31-rc3:
kvm_notify_acked_irq - fails to check irq type,
kvm_set_msi - ignored address_hi in message

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 qemu-kvm.c |   32 ++++++++++++++++++++++++++++++--
 1 files changed, 30 insertions(+), 2 deletions(-)

Comments

Marcelo Tosatti July 23, 2009, 7:59 p.m. UTC | #1
On Wed, Jul 22, 2009 at 05:41:22PM +0300, Michael S. Tsirkin wrote:
> Kernels up to 2.6.31 have a bug: MSI entries where looked at when irq
> is acked, address_lo was interpreted as irqchip, and
> address_hi as pin.  If that matches a real interrupt this prevents
> ack notifier from being processed.
> 
> Since these kernels ignore the value for address_hi when delivering
> MSI, work around this by setting a value that never matches an
> interrupt pin number.
> 
> Pointers to relevant kernel code, for reference: in kernel v2.6.31-rc3:
> kvm_notify_acked_irq - fails to check irq type,
> kvm_set_msi - ignored address_hi in message

Ugh, its still broken AFAICS? If so, what is the plan to fix it?

Also, you'd never want to use address_hi for something, in the
future?

> Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
> ---
>  qemu-kvm.c |   32 ++++++++++++++++++++++++++++++--
>  1 files changed, 30 insertions(+), 2 deletions(-)
> 
> diff --git a/qemu-kvm.c b/qemu-kvm.c
> index cebaa65..ec6d583 100644
> --- a/qemu-kvm.c
> +++ b/qemu-kvm.c
> @@ -38,6 +38,32 @@
>  #error libkvm: userspace and kernel version mismatch
>  #endif
>  
> +#ifdef KVM_CAP_IRQ_ROUTING
> +/* Broken MSI routing work-around:
> + * Kernels up to 2.6.31 have a bug: MSI entries where looked at when irq
> + * is acked, address_lo was interpreted as irqchip, and
> + * address_hi as pin.  If that matches a real interrupt this prevents
> + * ack notifier from being processed.
> + *
> + * Since these kernels ignore the value for address_hi when delivering
> + * MSI, work around this by setting a value that never matches an
> + * interrupt pin number.
> + */
> +#define KVM_BROKEN_MSI_ROUTING 1
> +static inline
> +void kvm_broken_msi_fix(struct kvm_irq_routing_entry *entry)
> +{
> +	if (entry->type == KVM_IRQ_ROUTING_MSI) {
> +		entry->u.msi.address_hi = 0xffffffff;
> +	}
> +}
> +static inline
> +unsigned kvm_broken_msi_address_hi(struct kvm_irq_routing_entry *entry)
> +{
> +	return 0xffffffff;
> +}
> +#endif
> +
>  int kvm_allowed = 1;
>  int kvm_irqchip = 1;
>  int kvm_pit = 1;
> @@ -1433,6 +1459,7 @@ int kvm_add_routing_entry(kvm_context_t kvm,
>  	new->type = entry->type;
>  	new->flags = entry->flags;
>  	new->u = entry->u;
> +	kvm_broken_msi_fix(new);
>  
>  	set_gsi(kvm, entry->gsi);
>  
> @@ -1489,7 +1516,7 @@ int kvm_del_routing_entry(kvm_context_t kvm,
>  				if (e->u.msi.address_lo ==
>  				    entry->u.msi.address_lo
>  				    && e->u.msi.address_hi ==
> -				    entry->u.msi.address_hi
> +				    kvm_broken_msi_address_hi(entry)
>  				    && e->u.msi.data == entry->u.msi.data) {
>  					p = &kvm->irq_routes->
>  					    entries[--kvm->irq_routes->nr];
> @@ -1550,9 +1577,10 @@ int kvm_update_routing_entry(kvm_context_t kvm,
>              break;
>          case KVM_IRQ_ROUTING_MSI:
>              if (e->u.msi.address_lo == entry->u.msi.address_lo &&
> -                e->u.msi.address_hi == entry->u.msi.address_hi &&
> +                e->u.msi.address_hi == kvm_broken_msi_address_hi(entry) &&
>                  e->u.msi.data == entry->u.msi.data) {
>                  memcpy(&e->u.msi, &newentry->u.msi, sizeof e->u.msi);
> +		kvm_broken_msi_fix(e);
>                  return 0;
>              }
>              break;
> -- 
> 1.6.2.5
> --
> To unsubscribe from this list: send the line "unsubscribe kvm" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Michael S. Tsirkin July 23, 2009, 8:19 p.m. UTC | #2
On Thu, Jul 23, 2009 at 04:59:38PM -0300, Marcelo Tosatti wrote:
> On Wed, Jul 22, 2009 at 05:41:22PM +0300, Michael S. Tsirkin wrote:
> > Kernels up to 2.6.31 have a bug: MSI entries where looked at when irq
> > is acked, address_lo was interpreted as irqchip, and
> > address_hi as pin.  If that matches a real interrupt this prevents
> > ack notifier from being processed.
> > 
> > Since these kernels ignore the value for address_hi when delivering
> > MSI, work around this by setting a value that never matches an
> > interrupt pin number.
> > 
> > Pointers to relevant kernel code, for reference: in kernel v2.6.31-rc3:
> > kvm_notify_acked_irq - fails to check irq type,
> > kvm_set_msi - ignored address_hi in message
> 
> Ugh, its still broken AFAICS? If so, what is the plan to fix it?
> 

I'd like Avi's input on that.
One can imagine multiple ways to fix this in kernel.  One possible way
proposed by Gleb recently is switching to a new send_msi ioctl.
If we do it this way, we can just keep the old interface as broken.

> Also, you'd never want to use address_hi for something, in the
> future?

AFAIK apic always has 0 in address_hi. Maybe x2apic is different, we can
deactivate the workaround in userspace if we ever enable x2apic.

> > Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
> > ---
> >  qemu-kvm.c |   32 ++++++++++++++++++++++++++++++--
> >  1 files changed, 30 insertions(+), 2 deletions(-)
> > 
> > diff --git a/qemu-kvm.c b/qemu-kvm.c
> > index cebaa65..ec6d583 100644
> > --- a/qemu-kvm.c
> > +++ b/qemu-kvm.c
> > @@ -38,6 +38,32 @@
> >  #error libkvm: userspace and kernel version mismatch
> >  #endif
> >  
> > +#ifdef KVM_CAP_IRQ_ROUTING
> > +/* Broken MSI routing work-around:
> > + * Kernels up to 2.6.31 have a bug: MSI entries where looked at when irq
> > + * is acked, address_lo was interpreted as irqchip, and
> > + * address_hi as pin.  If that matches a real interrupt this prevents
> > + * ack notifier from being processed.
> > + *
> > + * Since these kernels ignore the value for address_hi when delivering
> > + * MSI, work around this by setting a value that never matches an
> > + * interrupt pin number.
> > + */
> > +#define KVM_BROKEN_MSI_ROUTING 1
> > +static inline
> > +void kvm_broken_msi_fix(struct kvm_irq_routing_entry *entry)
> > +{
> > +	if (entry->type == KVM_IRQ_ROUTING_MSI) {
> > +		entry->u.msi.address_hi = 0xffffffff;
> > +	}
> > +}
> > +static inline
> > +unsigned kvm_broken_msi_address_hi(struct kvm_irq_routing_entry *entry)
> > +{
> > +	return 0xffffffff;
> > +}
> > +#endif
> > +
> >  int kvm_allowed = 1;
> >  int kvm_irqchip = 1;
> >  int kvm_pit = 1;
> > @@ -1433,6 +1459,7 @@ int kvm_add_routing_entry(kvm_context_t kvm,
> >  	new->type = entry->type;
> >  	new->flags = entry->flags;
> >  	new->u = entry->u;
> > +	kvm_broken_msi_fix(new);
> >  
> >  	set_gsi(kvm, entry->gsi);
> >  
> > @@ -1489,7 +1516,7 @@ int kvm_del_routing_entry(kvm_context_t kvm,
> >  				if (e->u.msi.address_lo ==
> >  				    entry->u.msi.address_lo
> >  				    && e->u.msi.address_hi ==
> > -				    entry->u.msi.address_hi
> > +				    kvm_broken_msi_address_hi(entry)
> >  				    && e->u.msi.data == entry->u.msi.data) {
> >  					p = &kvm->irq_routes->
> >  					    entries[--kvm->irq_routes->nr];
> > @@ -1550,9 +1577,10 @@ int kvm_update_routing_entry(kvm_context_t kvm,
> >              break;
> >          case KVM_IRQ_ROUTING_MSI:
> >              if (e->u.msi.address_lo == entry->u.msi.address_lo &&
> > -                e->u.msi.address_hi == entry->u.msi.address_hi &&
> > +                e->u.msi.address_hi == kvm_broken_msi_address_hi(entry) &&
> >                  e->u.msi.data == entry->u.msi.data) {
> >                  memcpy(&e->u.msi, &newentry->u.msi, sizeof e->u.msi);
> > +		kvm_broken_msi_fix(e);
> >                  return 0;
> >              }
> >              break;
> > -- 
> > 1.6.2.5
> > --
> > To unsubscribe from this list: send the line "unsubscribe kvm" in
> > the body of a message to majordomo@vger.kernel.org
> > More majordomo info at  http://vger.kernel.org/majordomo-info.html
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Gleb Natapov July 24, 2009, 3:58 a.m. UTC | #3
On Thu, Jul 23, 2009 at 11:19:05PM +0300, Michael S. Tsirkin wrote:
> On Thu, Jul 23, 2009 at 04:59:38PM -0300, Marcelo Tosatti wrote:
> > On Wed, Jul 22, 2009 at 05:41:22PM +0300, Michael S. Tsirkin wrote:
> > > Kernels up to 2.6.31 have a bug: MSI entries where looked at when irq
> > > is acked, address_lo was interpreted as irqchip, and
> > > address_hi as pin.  If that matches a real interrupt this prevents
> > > ack notifier from being processed.
> > > 
> > > Since these kernels ignore the value for address_hi when delivering
> > > MSI, work around this by setting a value that never matches an
> > > interrupt pin number.
> > > 
> > > Pointers to relevant kernel code, for reference: in kernel v2.6.31-rc3:
> > > kvm_notify_acked_irq - fails to check irq type,
> > > kvm_set_msi - ignored address_hi in message
> > 
> > Ugh, its still broken AFAICS? If so, what is the plan to fix it?
> > 
> 
> I'd like Avi's input on that.
> One can imagine multiple ways to fix this in kernel.  One possible way
> proposed by Gleb recently is switching to a new send_msi ioctl.
> If we do it this way, we can just keep the old interface as broken.
> 
> > Also, you'd never want to use address_hi for something, in the
> > future?
> 
> AFAIK apic always has 0 in address_hi. Maybe x2apic is different, we can
> deactivate the workaround in userspace if we ever enable x2apic.
> 
x2apic doesn't use address_hi either without interrupt remapping.

> > > Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
> > > ---
> > >  qemu-kvm.c |   32 ++++++++++++++++++++++++++++++--
> > >  1 files changed, 30 insertions(+), 2 deletions(-)
> > > 
> > > diff --git a/qemu-kvm.c b/qemu-kvm.c
> > > index cebaa65..ec6d583 100644
> > > --- a/qemu-kvm.c
> > > +++ b/qemu-kvm.c
> > > @@ -38,6 +38,32 @@
> > >  #error libkvm: userspace and kernel version mismatch
> > >  #endif
> > >  
> > > +#ifdef KVM_CAP_IRQ_ROUTING
> > > +/* Broken MSI routing work-around:
> > > + * Kernels up to 2.6.31 have a bug: MSI entries where looked at when irq
> > > + * is acked, address_lo was interpreted as irqchip, and
> > > + * address_hi as pin.  If that matches a real interrupt this prevents
> > > + * ack notifier from being processed.
> > > + *
> > > + * Since these kernels ignore the value for address_hi when delivering
> > > + * MSI, work around this by setting a value that never matches an
> > > + * interrupt pin number.
> > > + */
> > > +#define KVM_BROKEN_MSI_ROUTING 1
> > > +static inline
> > > +void kvm_broken_msi_fix(struct kvm_irq_routing_entry *entry)
> > > +{
> > > +	if (entry->type == KVM_IRQ_ROUTING_MSI) {
> > > +		entry->u.msi.address_hi = 0xffffffff;
> > > +	}
> > > +}
> > > +static inline
> > > +unsigned kvm_broken_msi_address_hi(struct kvm_irq_routing_entry *entry)
> > > +{
> > > +	return 0xffffffff;
> > > +}
> > > +#endif
> > > +
> > >  int kvm_allowed = 1;
> > >  int kvm_irqchip = 1;
> > >  int kvm_pit = 1;
> > > @@ -1433,6 +1459,7 @@ int kvm_add_routing_entry(kvm_context_t kvm,
> > >  	new->type = entry->type;
> > >  	new->flags = entry->flags;
> > >  	new->u = entry->u;
> > > +	kvm_broken_msi_fix(new);
> > >  
> > >  	set_gsi(kvm, entry->gsi);
> > >  
> > > @@ -1489,7 +1516,7 @@ int kvm_del_routing_entry(kvm_context_t kvm,
> > >  				if (e->u.msi.address_lo ==
> > >  				    entry->u.msi.address_lo
> > >  				    && e->u.msi.address_hi ==
> > > -				    entry->u.msi.address_hi
> > > +				    kvm_broken_msi_address_hi(entry)
> > >  				    && e->u.msi.data == entry->u.msi.data) {
> > >  					p = &kvm->irq_routes->
> > >  					    entries[--kvm->irq_routes->nr];
> > > @@ -1550,9 +1577,10 @@ int kvm_update_routing_entry(kvm_context_t kvm,
> > >              break;
> > >          case KVM_IRQ_ROUTING_MSI:
> > >              if (e->u.msi.address_lo == entry->u.msi.address_lo &&
> > > -                e->u.msi.address_hi == entry->u.msi.address_hi &&
> > > +                e->u.msi.address_hi == kvm_broken_msi_address_hi(entry) &&
> > >                  e->u.msi.data == entry->u.msi.data) {
> > >                  memcpy(&e->u.msi, &newentry->u.msi, sizeof e->u.msi);
> > > +		kvm_broken_msi_fix(e);
> > >                  return 0;
> > >              }
> > >              break;
> > > -- 
> > > 1.6.2.5
> > > --
> > > To unsubscribe from this list: send the line "unsubscribe kvm" in
> > > the body of a message to majordomo@vger.kernel.org
> > > More majordomo info at  http://vger.kernel.org/majordomo-info.html

--
			Gleb.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/qemu-kvm.c b/qemu-kvm.c
index cebaa65..ec6d583 100644
--- a/qemu-kvm.c
+++ b/qemu-kvm.c
@@ -38,6 +38,32 @@ 
 #error libkvm: userspace and kernel version mismatch
 #endif
 
+#ifdef KVM_CAP_IRQ_ROUTING
+/* Broken MSI routing work-around:
+ * Kernels up to 2.6.31 have a bug: MSI entries where looked at when irq
+ * is acked, address_lo was interpreted as irqchip, and
+ * address_hi as pin.  If that matches a real interrupt this prevents
+ * ack notifier from being processed.
+ *
+ * Since these kernels ignore the value for address_hi when delivering
+ * MSI, work around this by setting a value that never matches an
+ * interrupt pin number.
+ */
+#define KVM_BROKEN_MSI_ROUTING 1
+static inline
+void kvm_broken_msi_fix(struct kvm_irq_routing_entry *entry)
+{
+	if (entry->type == KVM_IRQ_ROUTING_MSI) {
+		entry->u.msi.address_hi = 0xffffffff;
+	}
+}
+static inline
+unsigned kvm_broken_msi_address_hi(struct kvm_irq_routing_entry *entry)
+{
+	return 0xffffffff;
+}
+#endif
+
 int kvm_allowed = 1;
 int kvm_irqchip = 1;
 int kvm_pit = 1;
@@ -1433,6 +1459,7 @@  int kvm_add_routing_entry(kvm_context_t kvm,
 	new->type = entry->type;
 	new->flags = entry->flags;
 	new->u = entry->u;
+	kvm_broken_msi_fix(new);
 
 	set_gsi(kvm, entry->gsi);
 
@@ -1489,7 +1516,7 @@  int kvm_del_routing_entry(kvm_context_t kvm,
 				if (e->u.msi.address_lo ==
 				    entry->u.msi.address_lo
 				    && e->u.msi.address_hi ==
-				    entry->u.msi.address_hi
+				    kvm_broken_msi_address_hi(entry)
 				    && e->u.msi.data == entry->u.msi.data) {
 					p = &kvm->irq_routes->
 					    entries[--kvm->irq_routes->nr];
@@ -1550,9 +1577,10 @@  int kvm_update_routing_entry(kvm_context_t kvm,
             break;
         case KVM_IRQ_ROUTING_MSI:
             if (e->u.msi.address_lo == entry->u.msi.address_lo &&
-                e->u.msi.address_hi == entry->u.msi.address_hi &&
+                e->u.msi.address_hi == kvm_broken_msi_address_hi(entry) &&
                 e->u.msi.data == entry->u.msi.data) {
                 memcpy(&e->u.msi, &newentry->u.msi, sizeof e->u.msi);
+		kvm_broken_msi_fix(e);
                 return 0;
             }
             break;