@@ -2979,6 +2979,7 @@ len must be a multiple of sizeof(struct kvm_s390_irq). It must be > 0
and it must not exceed (max_vcpus + 32) * sizeof(struct kvm_s390_irq),
which is the maximum number of possibly pending cpu-local interrupts.
+
5. The kvm_run structure
------------------------
@@ -3575,6 +3576,20 @@ struct {
KVM handlers should exit to userspace with rc = -EREMOTE.
+7.5 KVM_SPLIT_IRQCHIP
+
+Capability: KVM_CAP_SPLIT_IRQCHIP
+Architectures: x86
+Type: VM ioctl
+Parameters: None
+Returns: 0 on success, -1 on error
+
+Create a local apic for each processor in the kernel. This differs from
+KVM_CREATE_IRQCHIP in that it only creates the local apic; it creates neither
+the ioapic nor the pic in the kernel. Also, enables in kernel routing of
+interrupt requests. Fails if VCPU has already been created, or if the irqchip is
+already in the kernel.
+
8. Other capabilities.
----------------------
@@ -17,4 +17,9 @@ static inline int irqchip_in_kernel(struct kvm *kvm)
return ret;
}
+static inline int lapic_in_kernel(struct kvm *kvm)
+{
+ return irqchip_in_kernel(kvm);
+}
+
#endif
@@ -19,4 +19,8 @@ static inline int irqchip_in_kernel(struct kvm *kvm)
return 1;
}
+static inline int lapic_in_kernel(struct kvm *kvm)
+{
+ return irqchip_in_kernel(kvm);
+}
#endif
@@ -638,6 +638,8 @@ struct kvm_arch {
bool boot_vcpu_runs_old_kvmclock;
u64 disabled_quirks;
+
+ bool irqchip_split;
};
struct kvm_vm_stat {
@@ -291,7 +291,7 @@ static int kvm_deassign_irq(struct kvm *kvm,
{
unsigned long guest_irq_type, host_irq_type;
- if (!irqchip_in_kernel(kvm))
+ if (!lapic_in_kernel(kvm))
return -EINVAL;
/* no irq assignment to deassign */
if (!assigned_dev->irq_requested_type)
@@ -568,7 +568,7 @@ static int kvm_vm_ioctl_assign_irq(struct kvm *kvm,
struct kvm_assigned_dev_kernel *match;
unsigned long host_irq_type, guest_irq_type;
- if (!irqchip_in_kernel(kvm))
+ if (!lapic_in_kernel(kvm))
return r;
mutex_lock(&kvm->lock);
@@ -57,7 +57,7 @@ static int kvm_cpu_has_extint(struct kvm_vcpu *v)
*/
int kvm_cpu_has_injectable_intr(struct kvm_vcpu *v)
{
- if (!irqchip_in_kernel(v->kvm))
+ if (!lapic_in_kernel(v->kvm))
return v->arch.interrupt.pending;
if (kvm_cpu_has_extint(v))
@@ -75,7 +75,7 @@ int kvm_cpu_has_injectable_intr(struct kvm_vcpu *v)
*/
int kvm_cpu_has_interrupt(struct kvm_vcpu *v)
{
- if (!irqchip_in_kernel(v->kvm))
+ if (!lapic_in_kernel(v->kvm))
return v->arch.interrupt.pending;
if (kvm_cpu_has_extint(v))
@@ -103,7 +103,7 @@ int kvm_cpu_get_interrupt(struct kvm_vcpu *v)
{
int vector;
- if (!irqchip_in_kernel(v->kvm))
+ if (!lapic_in_kernel(v->kvm))
return v->arch.interrupt.nr;
vector = kvm_cpu_get_extint(v);
@@ -92,6 +92,17 @@ static inline int irqchip_in_kernel(struct kvm *kvm)
return ret;
}
+static inline int irqchip_split(struct kvm *kvm)
+{
+ return kvm->arch.irqchip_split;
+}
+
+static inline int lapic_in_kernel(struct kvm *kvm)
+{
+ return irqchip_split(kvm) || irqchip_in_kernel(kvm);
+}
+
+
void kvm_pic_reset(struct kvm_kpic_state *s);
void kvm_inject_pending_timer_irqs(struct kvm_vcpu *vcpu);
@@ -328,3 +328,10 @@ int kvm_setup_default_irq_routing(struct kvm *kvm)
return kvm_set_irq_routing(kvm, default_routing,
ARRAY_SIZE(default_routing), 0);
}
+
+static const struct kvm_irq_routing_entry empty_routing[] = {};
+
+int kvm_setup_empty_irq_routing(struct kvm *kvm)
+{
+ return kvm_set_irq_routing(kvm, empty_routing, 0, 0);
+}
@@ -209,7 +209,8 @@ out:
if (old)
kfree_rcu(old, rcu);
- kvm_vcpu_request_scan_ioapic(kvm);
+ if (!irqchip_split(kvm))
+ kvm_vcpu_request_scan_ioapic(kvm);
}
static inline void apic_set_spiv(struct kvm_lapic *apic, u32 val)
@@ -1819,7 +1820,8 @@ void kvm_apic_post_state_restore(struct kvm_vcpu *vcpu,
kvm_x86_ops->hwapic_isr_update(vcpu->kvm,
apic_find_highest_isr(apic));
kvm_make_request(KVM_REQ_EVENT, vcpu);
- kvm_rtc_eoi_tracking_restore_one(vcpu);
+ if (!irqchip_split(vcpu->kvm))
+ kvm_rtc_eoi_tracking_restore_one(vcpu);
}
void __kvm_migrate_apic_timer(struct kvm_vcpu *vcpu)
@@ -1902,7 +1904,8 @@ static void apic_sync_pv_eoi_to_guest(struct kvm_vcpu *vcpu,
/* Cache not set: could be safe but we don't bother. */
apic->highest_isr_cache == -1 ||
/* Need EOI to update ioapic. */
- kvm_ioapic_handles_vector(vcpu->kvm, apic->highest_isr_cache)) {
+ kvm_ioapic_handles_vector(vcpu->kvm, apic->highest_isr_cache) ||
+ irqchip_split(vcpu->kvm)) {
/*
* PV EOI was disabled by apic_sync_pv_eoi_from_guest
* so we need not do anything here.
@@ -1958,7 +1961,7 @@ int kvm_x2apic_msr_write(struct kvm_vcpu *vcpu, u32 msr, u64 data)
struct kvm_lapic *apic = vcpu->arch.apic;
u32 reg = (msr - APIC_BASE_MSR) << 4;
- if (!irqchip_in_kernel(vcpu->kvm) || !apic_x2apic_mode(apic))
+ if (!lapic_in_kernel(vcpu->kvm) || !apic_x2apic_mode(apic))
return 1;
if (reg == APIC_ICR2)
@@ -1975,7 +1978,7 @@ int kvm_x2apic_msr_read(struct kvm_vcpu *vcpu, u32 msr, u64 *data)
struct kvm_lapic *apic = vcpu->arch.apic;
u32 reg = (msr - APIC_BASE_MSR) << 4, low, high = 0;
- if (!irqchip_in_kernel(vcpu->kvm) || !apic_x2apic_mode(apic))
+ if (!lapic_in_kernel(vcpu->kvm) || !apic_x2apic_mode(apic))
return 1;
if (reg == APIC_DFR || reg == APIC_ICR2) {
@@ -3465,7 +3465,7 @@ static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu, gva_t gva, gfn_t gfn)
static bool can_do_async_pf(struct kvm_vcpu *vcpu)
{
- if (unlikely(!irqchip_in_kernel(vcpu->kvm) ||
+ if (unlikely(!lapic_in_kernel(vcpu->kvm) ||
kvm_event_needs_reinjection(vcpu)))
return false;
@@ -3054,7 +3054,7 @@ static int cr8_write_interception(struct vcpu_svm *svm)
u8 cr8_prev = kvm_get_cr8(&svm->vcpu);
/* instruction emulation calls kvm_set_cr8() */
r = cr_interception(svm);
- if (irqchip_in_kernel(svm->vcpu.kvm))
+ if (lapic_in_kernel(svm->vcpu.kvm))
return r;
if (cr8_prev <= kvm_get_cr8(&svm->vcpu))
return r;
@@ -3295,7 +3295,7 @@ static int interrupt_window_interception(struct vcpu_svm *svm)
* If the user space waits to inject interrupts, exit as soon as
* possible
*/
- if (!irqchip_in_kernel(svm->vcpu.kvm) &&
+ if (!lapic_in_kernel(svm->vcpu.kvm) &&
kvm_run->request_interrupt_window &&
!kvm_cpu_has_interrupt(&svm->vcpu)) {
kvm_run->exit_reason = KVM_EXIT_IRQ_WINDOW_OPEN;
@@ -948,7 +948,7 @@ static inline bool cpu_has_vmx_tpr_shadow(void)
static inline bool vm_need_tpr_shadow(struct kvm *kvm)
{
- return (cpu_has_vmx_tpr_shadow()) && (irqchip_in_kernel(kvm));
+ return (cpu_has_vmx_tpr_shadow()) && lapic_in_kernel(kvm);
}
static inline bool cpu_has_secondary_exec_ctrls(void)
@@ -1064,7 +1064,7 @@ static inline bool cpu_has_vmx_ple(void)
static inline bool vm_need_virtualize_apic_accesses(struct kvm *kvm)
{
- return flexpriority_enabled && irqchip_in_kernel(kvm);
+ return flexpriority_enabled && lapic_in_kernel(kvm);
}
static inline bool cpu_has_vmx_vpid(void)
@@ -4341,7 +4341,7 @@ static void vmx_disable_intercept_msr_write_x2apic(u32 msr)
static int vmx_vm_has_apicv(struct kvm *kvm)
{
- return enable_apicv && irqchip_in_kernel(kvm);
+ return enable_apicv && lapic_in_kernel(kvm);
}
static int vmx_complete_nested_posted_interrupt(struct kvm_vcpu *vcpu)
@@ -5317,7 +5317,7 @@ static int handle_cr(struct kvm_vcpu *vcpu)
u8 cr8 = (u8)val;
err = kvm_set_cr8(vcpu, cr8);
kvm_complete_insn_gp(vcpu, err);
- if (irqchip_in_kernel(vcpu->kvm))
+ if (lapic_in_kernel(vcpu->kvm))
return 1;
if (cr8_prev <= cr8)
return 1;
@@ -5534,7 +5534,7 @@ static int handle_interrupt_window(struct kvm_vcpu *vcpu)
* If the user space waits to inject interrupts, exit as soon as
* possible
*/
- if (!irqchip_in_kernel(vcpu->kvm) &&
+ if (!lapic_in_kernel(vcpu->kvm) &&
vcpu->run->request_interrupt_window &&
!kvm_cpu_has_interrupt(vcpu)) {
vcpu->run->exit_reason = KVM_EXIT_IRQ_WINDOW_OPEN;
@@ -9419,7 +9419,7 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
/* vmcs12's VM_ENTRY_LOAD_IA32_EFER and VM_ENTRY_IA32E_MODE are
* emulated by vmx_set_efer(), below.
*/
- vm_entry_controls_init(vmx,
+ vm_entry_controls_init(vmx,
(vmcs12->vm_entry_controls & ~VM_ENTRY_LOAD_IA32_EFER &
~VM_ENTRY_IA32E_MODE) |
(vmcs_config.vmentry_ctrl & ~VM_ENTRY_IA32E_MODE));
@@ -782,7 +782,7 @@ int kvm_set_cr8(struct kvm_vcpu *vcpu, unsigned long cr8)
{
if (cr8 & CR8_RESERVED_BITS)
return 1;
- if (irqchip_in_kernel(vcpu->kvm))
+ if (lapic_in_kernel(vcpu->kvm))
kvm_lapic_set_tpr(vcpu, cr8);
else
vcpu->arch.cr8 = cr8;
@@ -792,7 +792,7 @@ EXPORT_SYMBOL_GPL(kvm_set_cr8);
unsigned long kvm_get_cr8(struct kvm_vcpu *vcpu)
{
- if (irqchip_in_kernel(vcpu->kvm))
+ if (lapic_in_kernel(vcpu->kvm))
return kvm_lapic_get_cr8(vcpu);
else
return vcpu->arch.cr8;
@@ -2800,6 +2800,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
case KVM_CAP_TSC_DEADLINE_TIMER:
case KVM_CAP_ENABLE_CAP_VM:
case KVM_CAP_DISABLE_QUIRKS:
+ case KVM_CAP_SPLIT_IRQCHIP:
#ifdef CONFIG_KVM_DEVICE_ASSIGNMENT
case KVM_CAP_ASSIGN_DEV_IRQ:
case KVM_CAP_PCI_2_3:
@@ -3002,7 +3003,7 @@ static int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu,
{
if (irq->irq >= KVM_NR_INTERRUPTS)
return -EINVAL;
- if (irqchip_in_kernel(vcpu->kvm))
+ if (lapic_in_kernel(vcpu->kvm))
return -ENXIO;
kvm_queue_interrupt(vcpu, irq->irq, false);
@@ -3480,7 +3481,7 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
struct kvm_vapic_addr va;
r = -EINVAL;
- if (!irqchip_in_kernel(vcpu->kvm))
+ if (!lapic_in_kernel(vcpu->kvm))
goto out;
r = -EFAULT;
if (copy_from_user(&va, argp, sizeof va))
@@ -3838,7 +3839,7 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log)
int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_event,
bool line_status)
{
- if (!irqchip_in_kernel(kvm))
+ if (!lapic_in_kernel(kvm))
return -ENXIO;
irq_event->status = kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID,
@@ -3860,6 +3861,23 @@ static int kvm_vm_ioctl_enable_cap(struct kvm *kvm,
kvm->arch.disabled_quirks = cap->args[0];
r = 0;
break;
+ case KVM_CAP_SPLIT_IRQCHIP: {
+ mutex_lock(&kvm->lock);
+ r = -EEXIST;
+ if (lapic_in_kernel(kvm))
+ goto split_irqchip_unlock;
+ r = -EINVAL;
+ if (atomic_read(&kvm->online_vcpus))
+ goto split_irqchip_unlock;
+ r = kvm_setup_empty_irq_routing(kvm);
+ if (r)
+ goto split_irqchip_unlock;
+ kvm->arch.irqchip_split = true;
+ r = 0;
+split_irqchip_unlock:
+ mutex_unlock(&kvm->lock);
+ break;
+ }
default:
r = -EINVAL;
break;
@@ -4128,6 +4146,7 @@ long kvm_arch_vm_ioctl(struct file *filp,
r = kvm_vm_ioctl_enable_cap(kvm, &cap);
break;
}
+
default:
r = kvm_vm_ioctl_assigned_device(kvm, ioctl, arg);
}
@@ -5893,7 +5912,7 @@ void kvm_arch_exit(void)
int kvm_vcpu_halt(struct kvm_vcpu *vcpu)
{
++vcpu->stat.halt_exits;
- if (irqchip_in_kernel(vcpu->kvm)) {
+ if (lapic_in_kernel(vcpu->kvm)) {
vcpu->arch.mp_state = KVM_MP_STATE_HALTED;
return 1;
} else {
@@ -6060,7 +6079,7 @@ static int emulator_fix_hypercall(struct x86_emulate_ctxt *ctxt)
*/
static int dm_request_for_irq_injection(struct kvm_vcpu *vcpu)
{
- return (!irqchip_in_kernel(vcpu->kvm) && !kvm_cpu_has_interrupt(vcpu) &&
+ return (!lapic_in_kernel(vcpu->kvm) && !kvm_cpu_has_interrupt(vcpu) &&
vcpu->run->request_interrupt_window &&
kvm_arch_interrupt_allowed(vcpu));
}
@@ -6072,7 +6091,7 @@ static void post_kvm_run_save(struct kvm_vcpu *vcpu)
kvm_run->if_flag = (kvm_get_rflags(vcpu) & X86_EFLAGS_IF) != 0;
kvm_run->cr8 = kvm_get_cr8(vcpu);
kvm_run->apic_base = kvm_get_apic_base(vcpu);
- if (irqchip_in_kernel(vcpu->kvm))
+ if (lapic_in_kernel(vcpu->kvm))
kvm_run->ready_for_interrupt_injection = 1;
else
kvm_run->ready_for_interrupt_injection =
@@ -6219,7 +6238,7 @@ void kvm_vcpu_reload_apic_access_page(struct kvm_vcpu *vcpu)
{
struct page *page = NULL;
- if (!irqchip_in_kernel(vcpu->kvm))
+ if (!lapic_in_kernel(vcpu->kvm))
return;
if (!kvm_x86_ops->set_apic_access_page_addr)
@@ -6255,7 +6274,7 @@ void kvm_arch_mmu_notifier_invalidate_page(struct kvm *kvm,
static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
{
int r;
- bool req_int_win = !irqchip_in_kernel(vcpu->kvm) &&
+ bool req_int_win = !lapic_in_kernel(vcpu->kvm) &&
vcpu->run->request_interrupt_window;
bool req_immediate_exit = false;
@@ -6644,7 +6663,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
}
/* re-sync apic's tpr */
- if (!irqchip_in_kernel(vcpu->kvm)) {
+ if (!lapic_in_kernel(vcpu->kvm)) {
if (kvm_set_cr8(vcpu, kvm_run->cr8) != 0) {
r = -EINVAL;
goto out;
@@ -7340,7 +7359,7 @@ void kvm_arch_check_processor_compat(void *rtn)
bool kvm_vcpu_compatible(struct kvm_vcpu *vcpu)
{
- return irqchip_in_kernel(vcpu->kvm) == (vcpu->arch.apic != NULL);
+ return lapic_in_kernel(vcpu->kvm) == (vcpu->arch.apic != NULL);
}
struct static_key kvm_no_apic_vcpu __read_mostly;
@@ -7356,7 +7375,7 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
vcpu->arch.pv.pv_unhalted = false;
vcpu->arch.emulate_ctxt.ops = &emulate_ops;
- if (!irqchip_in_kernel(kvm) || kvm_vcpu_is_reset_bsp(vcpu))
+ if (!lapic_in_kernel(kvm) || kvm_vcpu_is_reset_bsp(vcpu))
vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
else
vcpu->arch.mp_state = KVM_MP_STATE_UNINITIALIZED;
@@ -7374,7 +7393,7 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
if (r < 0)
goto fail_free_pio_data;
- if (irqchip_in_kernel(kvm)) {
+ if (lapic_in_kernel(kvm)) {
r = kvm_create_lapic(vcpu);
if (r < 0)
goto fail_mmu_destroy;
@@ -7437,7 +7456,7 @@ void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
kvm_mmu_destroy(vcpu);
srcu_read_unlock(&vcpu->kvm->srcu, idx);
free_page((unsigned long)vcpu->arch.pio_data);
- if (!irqchip_in_kernel(vcpu->kvm))
+ if (!lapic_in_kernel(vcpu->kvm))
static_key_slow_dec(&kvm_no_apic_vcpu);
}
@@ -329,6 +329,7 @@ int kvm_vgic_vcpu_pending_irq(struct kvm_vcpu *vcpu);
int kvm_vgic_vcpu_active_irq(struct kvm_vcpu *vcpu);
#define irqchip_in_kernel(k) (!!((k)->arch.vgic.in_kernel))
+#define lapic_in_kernel(k) (irqchip_in_kernel(k))
#define vgic_initialized(k) (!!((k)->arch.vgic.nr_cpus))
#define vgic_ready(k) ((k)->arch.vgic.ready)
@@ -934,6 +934,7 @@ static inline int mmu_notifier_retry(struct kvm *kvm, unsigned long mmu_seq)
#endif
int kvm_setup_default_irq_routing(struct kvm *kvm);
+int kvm_setup_empty_irq_routing(struct kvm *kvm);
int kvm_set_irq_routing(struct kvm *kvm,
const struct kvm_irq_routing_entry *entries,
unsigned nr,
@@ -815,6 +815,7 @@ struct kvm_ppc_smmu_info {
#define KVM_CAP_S390_IRQ_STATE 114
#define KVM_CAP_PPC_HWRNG 115
#define KVM_CAP_DISABLE_QUIRKS 116
+#define KVM_CAP_SPLIT_IRQCHIP 117
#ifdef KVM_CAP_IRQ_ROUTING
@@ -73,7 +73,7 @@ int kvm_send_userspace_msi(struct kvm *kvm, struct kvm_msi *msi)
{
struct kvm_kernel_irq_routing_entry route;
- if (!irqchip_in_kernel(kvm) || msi->flags != 0)
+ if (!lapic_in_kernel(kvm) || msi->flags != 0)
return -EINVAL;
route.msi.address_lo = msi->address_lo;
First patch in a series which enables the relocation of the PIC/IOAPIC/PIT to userspace. Adds capability KVM_CAP_SPLIT_IRQCHIP; KVM_CAP_SPLIT_IRQCHIP enables the construction of LAPICs without the rest of the irqchip. Compile tested for x86. Signed-off-by: Steve Rutherford <srutherford@google.com> Suggested-by: Andrew Honig <ahonig@google.com> --- Documentation/virtual/kvm/api.txt | 15 ++++++++++++ arch/powerpc/kvm/irq.h | 5 ++++ arch/s390/kvm/irq.h | 4 ++++ arch/x86/include/asm/kvm_host.h | 2 ++ arch/x86/kvm/assigned-dev.c | 4 ++-- arch/x86/kvm/irq.c | 6 ++--- arch/x86/kvm/irq.h | 11 +++++++++ arch/x86/kvm/irq_comm.c | 7 ++++++ arch/x86/kvm/lapic.c | 13 +++++++---- arch/x86/kvm/mmu.c | 2 +- arch/x86/kvm/svm.c | 4 ++-- arch/x86/kvm/vmx.c | 12 +++++----- arch/x86/kvm/x86.c | 49 +++++++++++++++++++++++++++------------ include/kvm/arm_vgic.h | 1 + include/linux/kvm_host.h | 1 + include/uapi/linux/kvm.h | 1 + virt/kvm/irqchip.c | 2 +- 17 files changed, 104 insertions(+), 35 deletions(-)