diff mbox

[v5,4/4] KVM: x86: Add support for local interrupt requests from userspace

Message ID 1438039062-3168-4-git-send-email-srutherford@google.com (mailing list archive)
State New, archived
Headers show

Commit Message

Steve Rutherford July 27, 2015, 11:17 p.m. UTC
In order to enable userspace PIC support, the userspace PIC needs to
be able to inject local interrupts even when the APICs are in the
kernel.

KVM_INTERRUPT now supports sending local interrupts to an APIC when
APICs are in the kernel.

The ready_for_interrupt_request flag is now only set when the CPU/APIC
will immediately accept and inject an interrupt (i.e. APIC has not
masked the PIC).

When the PIC wishes to initiate an INTA cycle with, say, CPU0, it
kicks CPU0 out of the guest, and renedezvous with CPU0 once it arrives
in userspace.

When the CPU/APIC unmasks the PIC, a KVM_EXIT_IRQ_WINDOW_OPEN is
triggered, so that userspace has a chance to inject a PIC interrupt
if it had been pending.

Overall, this design can lead to a small number of spurious userspace
renedezvous. In particular, whenever the PIC transistions from low to
high while it is masked and whenever the PIC becomes unmasked while
it is low.

Note: this does not buffer more than one local interrupt in the
kernel, so the VMM needs to enter the guest in order to complete
interrupt injection before injecting an additional interrupt.

Compiles for x86.

Can pass the KVM Unit Tests.

Signed-off-by: Steve Rutherford <srutherford@google.com>
---
 Documentation/virtual/kvm/api.txt | 14 ++++++++++----
 arch/x86/include/asm/kvm_host.h   |  1 +
 arch/x86/kvm/irq.c                | 38 +++++++++++++++++++++++++++++---------
 arch/x86/kvm/irq.h                |  9 +++++++++
 arch/x86/kvm/lapic.h              |  2 ++
 arch/x86/kvm/x86.c                | 34 +++++++++++++++++++++++++++++++---
 include/linux/kvm_host.h          |  1 +
 7 files changed, 83 insertions(+), 16 deletions(-)

Comments

Paolo Bonzini July 28, 2015, 3:58 p.m. UTC | #1
On 28/07/2015 01:17, Steve Rutherford wrote:
>  	return kvm->arch.vpic;
>  }
>  
> +static inline int pic_in_kernel(struct kvm *kvm)
> +{
> +	int ret;
> +
> +	ret = (pic_irqchip(kvm) != NULL);
> +	smp_rmb();

What does this memory barrier pair with?  I don't think it's necessary.

> +	return ret;
> +}
> +
>  static inline int irqchip_split(struct kvm *kvm)
>  {
>  	return kvm->arch.irqchip_split;


> @@ -5819,13 +5828,24 @@ static void post_kvm_run_save(struct kvm_vcpu *vcpu)
>  	kvm_run->flags = is_smm(vcpu) ? KVM_RUN_X86_SMM : 0;
>  	kvm_run->cr8 = kvm_get_cr8(vcpu);
>  	kvm_run->apic_base = kvm_get_apic_base(vcpu);
> -	if (irqchip_in_kernel(vcpu->kvm))
> +	if (irqchip_in_kernel(vcpu->kvm) && pic_in_kernel(vcpu->kvm))
>  		kvm_run->ready_for_interrupt_injection = 1;
> -	else
> +	else if (irqchip_in_kernel(vcpu->kvm)) {
> +		int ready_for_interrupt_injection =
> +				kvm_apic_accept_pic_intr(vcpu);
> +
> +		if (!kvm_run->ready_for_interrupt_injection &&
> +		    ready_for_interrupt_injection)
> +			kvm_make_request(KVM_REQ_PIC_UNMASK_EXIT, vcpu);
> +
> +		kvm_run->ready_for_interrupt_injection =
> +				ready_for_interrupt_injection;
> +	} else {
>  		kvm_run->ready_for_interrupt_injection =
>  			kvm_arch_interrupt_allowed(vcpu) &&
>  			!kvm_cpu_has_interrupt(vcpu) &&
>  			!kvm_event_needs_reinjection(vcpu);
> +	}
>  }
>  
>  static void update_cr8_intercept(struct kvm_vcpu *vcpu)

Why is this necessary?  Could it just set
kvm_run->ready_for_interrupt_injection as in the pic_in_kernel case?

Paolo
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Steve Rutherford July 28, 2015, 7:06 p.m. UTC | #2
On Tue, Jul 28, 2015 at 05:58:38PM +0200, Paolo Bonzini wrote:
> 
> 
> On 28/07/2015 01:17, Steve Rutherford wrote:
> >  	return kvm->arch.vpic;
> >  }
> >  
> > +static inline int pic_in_kernel(struct kvm *kvm)
> > +{
> > +	int ret;
> > +
> > +	ret = (pic_irqchip(kvm) != NULL);
> > +	smp_rmb();
> 
> What does this memory barrier pair with?  I don't think it's necessary.

To be honest, it's probably not necessary. I couldn't find why
irqchip_in_kernel (which this function is more or less a copy of)
needed it's memory barrier, so I cargo culted this one in.

> 
> > +	return ret;
> > +}
> > +
> >  static inline int irqchip_split(struct kvm *kvm)
> >  {
> >  	return kvm->arch.irqchip_split;
> 
> 
> > @@ -5819,13 +5828,24 @@ static void post_kvm_run_save(struct kvm_vcpu *vcpu)
> >  	kvm_run->flags = is_smm(vcpu) ? KVM_RUN_X86_SMM : 0;
> >  	kvm_run->cr8 = kvm_get_cr8(vcpu);
> >  	kvm_run->apic_base = kvm_get_apic_base(vcpu);
> > -	if (irqchip_in_kernel(vcpu->kvm))
> > +	if (irqchip_in_kernel(vcpu->kvm) && pic_in_kernel(vcpu->kvm))
> >  		kvm_run->ready_for_interrupt_injection = 1;
> > -	else
> > +	else if (irqchip_in_kernel(vcpu->kvm)) {
> > +		int ready_for_interrupt_injection =
> > +				kvm_apic_accept_pic_intr(vcpu);
> > +
> > +		if (!kvm_run->ready_for_interrupt_injection &&
> > +		    ready_for_interrupt_injection)
> > +			kvm_make_request(KVM_REQ_PIC_UNMASK_EXIT, vcpu);
> > +
> > +		kvm_run->ready_for_interrupt_injection =
> > +				ready_for_interrupt_injection;
> > +	} else {
> >  		kvm_run->ready_for_interrupt_injection =
> >  			kvm_arch_interrupt_allowed(vcpu) &&
> >  			!kvm_cpu_has_interrupt(vcpu) &&
> >  			!kvm_event_needs_reinjection(vcpu);
> > +	}
> >  }
> >  
> >  static void update_cr8_intercept(struct kvm_vcpu *vcpu)
> 
> Why is this necessary?  Could it just set
> kvm_run->ready_for_interrupt_injection as in the pic_in_kernel case?

The goal is to couple the interrupt ack cycle as closely as possible
with the injection of the local interrupt (which occur more or less
atomically on real hardware). The idea is to only ever attempt to
inject local interrupts when the CPU/APIC is ready to immediately
accept. 

If the CPU is ignoring the PIC, the interrupt acknowledge cycle should
not be performed, even if the PIC is high. This patch uses the
ready_for_interrupt_injection flag to let userspace whether or not the
cpu is paying attention to the PIC at the moment. 

When the PIC is high and the CPU transitions from ignoring the PIC to
paying attention to the PIC, it should (per real hardware)
immediately trigger an interrupt acknowledge cycle (which requires
bouncing up to userspace).

Steve
> 
> Paolo
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Paolo Bonzini July 29, 2015, 10:15 a.m. UTC | #3
On 28/07/2015 01:17, Steve Rutherford wrote:
> +
> +	if (irqchip_in_kernel(vcpu->kvm) && !pic_in_kernel(vcpu->kvm) &&
> +	    vcpu->arch.pending_external_vector == -1) {
> +		vcpu->arch.pending_external_vector = irq->irq;
> +		return 0;
> +	} else if (irqchip_in_kernel(vcpu->kvm) && !pic_in_kernel(vcpu->kvm))
> +		return -EEXIST;
> +	else if (irqchip_in_kernel(vcpu->kvm))
>  		return -ENXIO;

This is better written as:

	if (!irqchip_in_kernel(vcpu->kvm)) {
	        kvm_queue_interrupt(vcpu, irq->irq, false);
	        kvm_make_request(KVM_REQ_EVENT, vcpu);
		return 0;
	}

	/*
	 * With in-kernel LAPIC, we only use this to inject EXTINT, so
	 * fail for in-kernel 8259.
	 */
	if (pic_in_kernel(vcpu->kvm)) {
		return -ENXIO;

	if (vcpu->arch.pending_external_vector != -1)
		return -EEXIST;

	vcpu->arch.pending_external_vector = irq->irq;
	return 0;

Paolo
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Paolo Bonzini July 29, 2015, 10:21 a.m. UTC | #4
On 28/07/2015 21:06, Steve Rutherford wrote:
>>> > > +static inline int pic_in_kernel(struct kvm *kvm)
>>> > > +{
>>> > > +	int ret;
>>> > > +
>>> > > +	ret = (pic_irqchip(kvm) != NULL);
>>> > > +	smp_rmb();
>> > 
>> > What does this memory barrier pair with?  I don't think it's necessary.
> To be honest, it's probably not necessary. I couldn't find why
> irqchip_in_kernel (which this function is more or less a copy of)
> needed it's memory barrier, so I cargo culted this one in.

It's for stuff like injecting an interrupt before any CPU is created,
while another thread is doing KVM_CREATE_IRQCHIP.  In your case a VCPU
has been created so you don't need it (the synchronization point is the
mutex_lock(&kvm->lock) in kvm_vm_ioctl_create_vcpu).

Paolo
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt
index 39e4c02..8f754d1 100644
--- a/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt
@@ -401,10 +401,9 @@  Capability: basic
 Architectures: x86, ppc, mips
 Type: vcpu ioctl
 Parameters: struct kvm_interrupt (in)
-Returns: 0 on success, -1 on error
+Returns: 0 on success, negative on failure.
 
-Queues a hardware interrupt vector to be injected.  This is only
-useful if in-kernel local APIC or equivalent is not used.
+Queues a hardware interrupt vector to be injected.
 
 /* for KVM_INTERRUPT */
 struct kvm_interrupt {
@@ -414,7 +413,14 @@  struct kvm_interrupt {
 
 X86:
 
-Note 'irq' is an interrupt vector, not an interrupt pin or line.
+Returns: 0 on success,
+	 -EEXIST if an interrupt is already enqueued
+	 -EINVAL the the irq number is invalid
+	 -ENXIO if the PIC is in the kernel
+	 -EFAULT if the pointer is invalid
+
+Note 'irq' is an interrupt vector, not an interrupt pin or line. This
+ioctl is useful if the in-kernel PIC is not used.
 
 PPC:
 
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index ebe7f07..b6508a3 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -563,6 +563,7 @@  struct kvm_vcpu_arch {
 
 	u64 eoi_exit_bitmaps[4];
 	int pending_ioapic_eoi;
+	int pending_external_vector;
 };
 
 struct kvm_lpage_info {
diff --git a/arch/x86/kvm/irq.c b/arch/x86/kvm/irq.c
index a1ec6a50..5fa0e6f 100644
--- a/arch/x86/kvm/irq.c
+++ b/arch/x86/kvm/irq.c
@@ -38,14 +38,27 @@  int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu)
 EXPORT_SYMBOL(kvm_cpu_has_pending_timer);
 
 /*
+ * check if there is a pending userspace external interrupt
+ */
+static int pending_userspace_extint(struct kvm_vcpu *v)
+{
+	return v->arch.pending_external_vector != -1;
+}
+
+/*
  * check if there is pending interrupt from
  * non-APIC source without intack.
  */
 static int kvm_cpu_has_extint(struct kvm_vcpu *v)
 {
-	if (kvm_apic_accept_pic_intr(v))
-		return pic_irqchip(v->kvm)->output;	/* PIC */
-	else
+	u8 accept = kvm_apic_accept_pic_intr(v);
+
+	if (accept) {
+		if (irqchip_split(v->kvm))
+			return pending_userspace_extint(v);
+		else
+			return pic_irqchip(v->kvm)->output;
+	} else
 		return 0;
 }
 
@@ -57,7 +70,7 @@  static int kvm_cpu_has_extint(struct kvm_vcpu *v)
  */
 int kvm_cpu_has_injectable_intr(struct kvm_vcpu *v)
 {
-	if (!irqchip_in_kernel(v->kvm))
+	if (!pic_in_kernel(v->kvm))
 		return v->arch.interrupt.pending;
 
 	if (kvm_cpu_has_extint(v))
@@ -75,7 +88,7 @@  int kvm_cpu_has_injectable_intr(struct kvm_vcpu *v)
  */
 int kvm_cpu_has_interrupt(struct kvm_vcpu *v)
 {
-	if (!irqchip_in_kernel(v->kvm))
+	if (!pic_in_kernel(v->kvm))
 		return v->arch.interrupt.pending;
 
 	if (kvm_cpu_has_extint(v))
@@ -91,9 +104,16 @@  EXPORT_SYMBOL_GPL(kvm_cpu_has_interrupt);
  */
 static int kvm_cpu_get_extint(struct kvm_vcpu *v)
 {
-	if (kvm_cpu_has_extint(v))
-		return kvm_pic_read_irq(v->kvm); /* PIC */
-	return -1;
+	if (kvm_cpu_has_extint(v)) {
+		if (irqchip_split(v->kvm)) {
+			int vector = v->arch.pending_external_vector;
+
+			v->arch.pending_external_vector = -1;
+			return vector;
+		} else
+			return kvm_pic_read_irq(v->kvm); /* PIC */
+	} else
+		return -1;
 }
 
 /*
@@ -103,7 +123,7 @@  int kvm_cpu_get_interrupt(struct kvm_vcpu *v)
 {
 	int vector;
 
-	if (!irqchip_in_kernel(v->kvm))
+	if (!pic_in_kernel(v->kvm) && v->arch.interrupt.pending)
 		return v->arch.interrupt.nr;
 
 	vector = kvm_cpu_get_extint(v);
diff --git a/arch/x86/kvm/irq.h b/arch/x86/kvm/irq.h
index 2f13dd5..96a86a5 100644
--- a/arch/x86/kvm/irq.h
+++ b/arch/x86/kvm/irq.h
@@ -83,6 +83,15 @@  static inline struct kvm_pic *pic_irqchip(struct kvm *kvm)
 	return kvm->arch.vpic;
 }
 
+static inline int pic_in_kernel(struct kvm *kvm)
+{
+	int ret;
+
+	ret = (pic_irqchip(kvm) != NULL);
+	smp_rmb();
+	return ret;
+}
+
 static inline int irqchip_split(struct kvm *kvm)
 {
 	return kvm->arch.irqchip_split;
diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h
index 7640379..6926430 100644
--- a/arch/x86/kvm/lapic.h
+++ b/arch/x86/kvm/lapic.h
@@ -64,6 +64,8 @@  int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq,
 		unsigned long *dest_map);
 int kvm_apic_local_deliver(struct kvm_lapic *apic, int lvt_type);
 
+void kvm_request_pic_injection(struct kvm_vcpu *vcpu);
+
 bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src,
 		struct kvm_lapic_irq *irq, int *r, unsigned long *dest_map);
 
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index eef562f..80cb387 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -65,6 +65,8 @@ 
 #include <asm/pvclock.h>
 #include <asm/div64.h>
 
+#define GET_VECTOR_FROM_USERSPACE 1
+
 #define MAX_IO_MSRS 256
 #define KVM_MAX_MCE_BANKS 32
 #define KVM_MCE_CAP_SUPPORTED (MCG_CTL_P | MCG_SER_P)
@@ -2675,7 +2677,14 @@  static int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu,
 {
 	if (irq->irq >= KVM_NR_INTERRUPTS)
 		return -EINVAL;
-	if (irqchip_in_kernel(vcpu->kvm))
+
+	if (irqchip_in_kernel(vcpu->kvm) && !pic_in_kernel(vcpu->kvm) &&
+	    vcpu->arch.pending_external_vector == -1) {
+		vcpu->arch.pending_external_vector = irq->irq;
+		return 0;
+	} else if (irqchip_in_kernel(vcpu->kvm) && !pic_in_kernel(vcpu->kvm))
+		return -EEXIST;
+	else if (irqchip_in_kernel(vcpu->kvm))
 		return -ENXIO;
 
 	kvm_queue_interrupt(vcpu, irq->irq, false);
@@ -5819,13 +5828,24 @@  static void post_kvm_run_save(struct kvm_vcpu *vcpu)
 	kvm_run->flags = is_smm(vcpu) ? KVM_RUN_X86_SMM : 0;
 	kvm_run->cr8 = kvm_get_cr8(vcpu);
 	kvm_run->apic_base = kvm_get_apic_base(vcpu);
-	if (irqchip_in_kernel(vcpu->kvm))
+	if (irqchip_in_kernel(vcpu->kvm) && pic_in_kernel(vcpu->kvm))
 		kvm_run->ready_for_interrupt_injection = 1;
-	else
+	else if (irqchip_in_kernel(vcpu->kvm)) {
+		int ready_for_interrupt_injection =
+				kvm_apic_accept_pic_intr(vcpu);
+
+		if (!kvm_run->ready_for_interrupt_injection &&
+		    ready_for_interrupt_injection)
+			kvm_make_request(KVM_REQ_PIC_UNMASK_EXIT, vcpu);
+
+		kvm_run->ready_for_interrupt_injection =
+				ready_for_interrupt_injection;
+	} else {
 		kvm_run->ready_for_interrupt_injection =
 			kvm_arch_interrupt_allowed(vcpu) &&
 			!kvm_cpu_has_interrupt(vcpu) &&
 			!kvm_event_needs_reinjection(vcpu);
+	}
 }
 
 static void update_cr8_intercept(struct kvm_vcpu *vcpu)
@@ -6308,6 +6328,12 @@  static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
 			r = 0;
 			goto out;
 		}
+		if (kvm_check_request(KVM_REQ_PIC_UNMASK_EXIT, vcpu)) {
+			vcpu->run->exit_reason = KVM_EXIT_IRQ_WINDOW_OPEN;
+			r = 0;
+			goto out;
+		}
+
 	}
 
 	if (kvm_check_request(KVM_REQ_EVENT, vcpu) || req_int_win) {
@@ -7401,6 +7427,8 @@  int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
 	kvm_async_pf_hash_reset(vcpu);
 	kvm_pmu_init(vcpu);
 
+	vcpu->arch.pending_external_vector = -1;
+
 	return 0;
 
 fail_free_mce_banks:
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 064067e..919a1be 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -141,6 +141,7 @@  static inline bool is_error_page(struct page *page)
 #define KVM_REQ_SMI               26
 #define KVM_REQ_HV_CRASH          27
 #define KVM_REQ_IOAPIC_EOI_EXIT   28
+#define KVM_REQ_PIC_UNMASK_EXIT   29
 
 #define KVM_USERSPACE_IRQ_SOURCE_ID		0
 #define KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID	1