@@ -2979,6 +2979,7 @@ len must be a multiple of sizeof(struct kvm_s390_irq). It must be > 0
and it must not exceed (max_vcpus + 32) * sizeof(struct kvm_s390_irq),
which is the maximum number of possibly pending cpu-local interrupts.
+
5. The kvm_run structure
------------------------
@@ -3575,6 +3576,21 @@ struct {
KVM handlers should exit to userspace with rc = -EREMOTE.
+7.5 KVM_SPLIT_IRQCHIP
+
+Architectures: x86
+Parameters: None
+Returns: 0 on success, -1 on error
+
+Create a local apic for each processor in the kernel. With this capability
+enabled, the userspace VMM is expected to emulate the IOAPIC and PIC.
+
+This supersedes KVM_CREATE_IRQCHIP, creating only local APICs, but no in kernel
+IOAPIC or PIC. This also enables in kernel routing of interrupt requests.
+
+Fails if VCPU has already been created, or if the irqchip is already in the
+kernel (i.e. KVM_CREATE_IRQCHIP has already been called).
+
8. Other capabilities.
----------------------
@@ -16,5 +16,4 @@ static inline int irqchip_in_kernel(struct kvm *kvm)
smp_rmb();
return ret;
}
-
#endif
@@ -18,5 +18,4 @@ static inline int irqchip_in_kernel(struct kvm *kvm)
{
return 1;
}
-
#endif
@@ -639,6 +639,8 @@ struct kvm_arch {
bool boot_vcpu_runs_old_kvmclock;
u64 disabled_quirks;
+
+ bool irqchip_split;
};
struct kvm_vm_stat {
@@ -35,6 +35,7 @@
#include <linux/kvm_host.h>
#include <linux/slab.h>
+#include "ioapic.h"
#include "irq.h"
#include "i8254.h"
#include "x86.h"
@@ -333,7 +334,9 @@ static void create_pit_timer(struct kvm *kvm, u32 val, int is_period)
struct kvm_kpit_state *ps = &kvm->arch.vpit->pit_state;
s64 interval;
- if (!irqchip_in_kernel(kvm) || ps->flags & KVM_PIT_FLAGS_HPET_LEGACY)
+ if (!irqchip_in_kernel(kvm) ||
+ !ioapic_in_kernel(kvm) ||
+ ps->flags & KVM_PIT_FLAGS_HPET_LEGACY)
return;
interval = muldiv64(val, NSEC_PER_SEC, KVM_PIT_FREQ);
@@ -98,6 +98,15 @@ static inline struct kvm_ioapic *ioapic_irqchip(struct kvm *kvm)
return kvm->arch.vioapic;
}
+static inline int ioapic_in_kernel(struct kvm *kvm)
+{
+ int ret;
+
+ ret = (ioapic_irqchip(kvm) != NULL);
+ smp_rmb();
+ return ret;
+}
+
static inline bool kvm_ioapic_handles_vector(struct kvm *kvm, int vector)
{
struct kvm_ioapic *ioapic = kvm->arch.vioapic;
@@ -83,11 +83,17 @@ static inline struct kvm_pic *pic_irqchip(struct kvm *kvm)
return kvm->arch.vpic;
}
+static inline int irqchip_split(struct kvm *kvm)
+{
+ return kvm->arch.irqchip_split;
+}
+
static inline int irqchip_in_kernel(struct kvm *kvm)
{
int ret;
ret = (pic_irqchip(kvm) != NULL);
+ ret |= irqchip_split(kvm);
smp_rmb();
return ret;
}
@@ -208,7 +208,7 @@ void kvm_free_irq_source_id(struct kvm *kvm, int irq_source_id)
goto unlock;
}
clear_bit(irq_source_id, &kvm->arch.irq_sources_bitmap);
- if (!irqchip_in_kernel(kvm))
+ if (!ioapic_in_kernel(kvm))
goto unlock;
kvm_ioapic_clear_all(kvm->arch.vioapic, irq_source_id);
@@ -328,3 +328,10 @@ int kvm_setup_default_irq_routing(struct kvm *kvm)
return kvm_set_irq_routing(kvm, default_routing,
ARRAY_SIZE(default_routing), 0);
}
+
+static const struct kvm_irq_routing_entry empty_routing[] = {};
+
+int kvm_setup_empty_irq_routing(struct kvm *kvm)
+{
+ return kvm_set_irq_routing(kvm, empty_routing, 0, 0);
+}
@@ -209,7 +209,8 @@ out:
if (old)
kfree_rcu(old, rcu);
- kvm_vcpu_request_scan_ioapic(kvm);
+ if (!irqchip_split(kvm))
+ kvm_vcpu_request_scan_ioapic(kvm);
}
static inline void apic_set_spiv(struct kvm_lapic *apic, u32 val)
@@ -1827,7 +1828,8 @@ void kvm_apic_post_state_restore(struct kvm_vcpu *vcpu,
kvm_x86_ops->hwapic_isr_update(vcpu->kvm,
apic_find_highest_isr(apic));
kvm_make_request(KVM_REQ_EVENT, vcpu);
- kvm_rtc_eoi_tracking_restore_one(vcpu);
+ if (!irqchip_split(vcpu->kvm))
+ kvm_rtc_eoi_tracking_restore_one(vcpu);
}
void __kvm_migrate_apic_timer(struct kvm_vcpu *vcpu)
@@ -1910,7 +1912,8 @@ static void apic_sync_pv_eoi_to_guest(struct kvm_vcpu *vcpu,
/* Cache not set: could be safe but we don't bother. */
apic->highest_isr_cache == -1 ||
/* Need EOI to update ioapic. */
- kvm_ioapic_handles_vector(vcpu->kvm, apic->highest_isr_cache)) {
+ kvm_ioapic_handles_vector(vcpu->kvm, apic->highest_isr_cache) ||
+ irqchip_split(vcpu->kvm)) {
/*
* PV EOI was disabled by apic_sync_pv_eoi_from_guest
* so we need not do anything here.
@@ -948,7 +948,7 @@ static inline bool cpu_has_vmx_tpr_shadow(void)
static inline bool vm_need_tpr_shadow(struct kvm *kvm)
{
- return (cpu_has_vmx_tpr_shadow()) && (irqchip_in_kernel(kvm));
+ return (cpu_has_vmx_tpr_shadow()) && irqchip_in_kernel(kvm);
}
static inline bool cpu_has_secondary_exec_ctrls(void)
@@ -9419,7 +9419,7 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
/* vmcs12's VM_ENTRY_LOAD_IA32_EFER and VM_ENTRY_IA32E_MODE are
* emulated by vmx_set_efer(), below.
*/
- vm_entry_controls_init(vmx,
+ vm_entry_controls_init(vmx,
(vmcs12->vm_entry_controls & ~VM_ENTRY_LOAD_IA32_EFER &
~VM_ENTRY_IA32E_MODE) |
(vmcs_config.vmentry_ctrl & ~VM_ENTRY_IA32E_MODE));
@@ -2866,6 +2866,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
case KVM_CAP_TSC_DEADLINE_TIMER:
case KVM_CAP_ENABLE_CAP_VM:
case KVM_CAP_DISABLE_QUIRKS:
+ case KVM_CAP_SPLIT_IRQCHIP:
#ifdef CONFIG_KVM_DEVICE_ASSIGNMENT
case KVM_CAP_ASSIGN_DEV_IRQ:
case KVM_CAP_PCI_2_3:
@@ -3926,6 +3927,23 @@ static int kvm_vm_ioctl_enable_cap(struct kvm *kvm,
kvm->arch.disabled_quirks = cap->args[0];
r = 0;
break;
+ case KVM_CAP_SPLIT_IRQCHIP: {
+ mutex_lock(&kvm->lock);
+ r = -EEXIST;
+ if (irqchip_in_kernel(kvm))
+ goto split_irqchip_unlock;
+ r = -EINVAL;
+ if (atomic_read(&kvm->online_vcpus))
+ goto split_irqchip_unlock;
+ r = kvm_setup_empty_irq_routing(kvm);
+ if (r)
+ goto split_irqchip_unlock;
+ kvm->arch.irqchip_split = true;
+ r = 0;
+split_irqchip_unlock:
+ mutex_unlock(&kvm->lock);
+ break;
+ }
default:
r = -EINVAL;
break;
@@ -4044,7 +4062,7 @@ long kvm_arch_vm_ioctl(struct file *filp,
}
r = -ENXIO;
- if (!irqchip_in_kernel(kvm))
+ if (!irqchip_in_kernel(kvm) || !ioapic_in_kernel(kvm))
goto get_irqchip_out;
r = kvm_vm_ioctl_get_irqchip(kvm, chip);
if (r)
@@ -4068,7 +4086,7 @@ long kvm_arch_vm_ioctl(struct file *filp,
}
r = -ENXIO;
- if (!irqchip_in_kernel(kvm))
+ if (!irqchip_in_kernel(kvm) || !ioapic_in_kernel(kvm))
goto set_irqchip_out;
r = kvm_vm_ioctl_set_irqchip(kvm, chip);
if (r)
@@ -4194,6 +4212,7 @@ long kvm_arch_vm_ioctl(struct file *filp,
r = kvm_vm_ioctl_enable_cap(kvm, &cap);
break;
}
+
default:
r = kvm_vm_ioctl_assigned_device(kvm, ioctl, arg);
}
@@ -329,6 +329,7 @@ int kvm_vgic_vcpu_pending_irq(struct kvm_vcpu *vcpu);
int kvm_vgic_vcpu_active_irq(struct kvm_vcpu *vcpu);
#define irqchip_in_kernel(k) (!!((k)->arch.vgic.in_kernel))
+#define lapic_in_kernel(k) (irqchip_in_kernel(k))
#define vgic_initialized(k) (!!((k)->arch.vgic.nr_cpus))
#define vgic_ready(k) ((k)->arch.vgic.ready)
@@ -935,6 +935,7 @@ static inline int mmu_notifier_retry(struct kvm *kvm, unsigned long mmu_seq)
#endif
int kvm_setup_default_irq_routing(struct kvm *kvm);
+int kvm_setup_empty_irq_routing(struct kvm *kvm);
int kvm_set_irq_routing(struct kvm *kvm,
const struct kvm_irq_routing_entry *entries,
unsigned nr,
@@ -815,6 +815,7 @@ struct kvm_ppc_smmu_info {
#define KVM_CAP_S390_IRQ_STATE 114
#define KVM_CAP_PPC_HWRNG 115
#define KVM_CAP_DISABLE_QUIRKS 116
+#define KVM_CAP_SPLIT_IRQCHIP 117
#ifdef KVM_CAP_IRQ_ROUTING
First patch in a series which enables the relocation of the PIC/IOAPIC to userspace. Adds capability KVM_CAP_SPLIT_IRQCHIP; KVM_CAP_SPLIT_IRQCHIP enables the construction of LAPICs without the rest of the irqchip. Compile tested for x86. Signed-off-by: Steve Rutherford <srutherford@google.com> Suggested-by: Andrew Honig <ahonig@google.com> --- Documentation/virtual/kvm/api.txt | 16 ++++++++++++++++ arch/powerpc/kvm/irq.h | 1 - arch/s390/kvm/irq.h | 1 - arch/x86/include/asm/kvm_host.h | 2 ++ arch/x86/kvm/i8254.c | 5 ++++- arch/x86/kvm/ioapic.h | 9 +++++++++ arch/x86/kvm/irq.h | 6 ++++++ arch/x86/kvm/irq_comm.c | 9 ++++++++- arch/x86/kvm/lapic.c | 9 ++++++--- arch/x86/kvm/vmx.c | 4 ++-- arch/x86/kvm/x86.c | 23 +++++++++++++++++++++-- include/kvm/arm_vgic.h | 1 + include/linux/kvm_host.h | 1 + include/uapi/linux/kvm.h | 1 + 14 files changed, 77 insertions(+), 11 deletions(-)