Message ID | 1433289107-20638-1-git-send-email-srutherford@google.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
On 03/06/2015 01:51, Steve Rutherford wrote: > First patch in a series which enables the relocation of the > PIC/IOAPIC to userspace. > > Adds capability KVM_CAP_SPLIT_IRQCHIP; > > KVM_CAP_SPLIT_IRQCHIP enables the construction of LAPICs without the > rest of the irqchip. The documentation is not updated. Changing other arches is definitely a no-no, unfortunately. But there are so many s/irqchip_in_kernel/lapic_in_kernel/ changes here, that I wonder if you should just keep irqchip_in_kernel true in the "split irqchip" case. You are already testing irqchip_split in a few cases, and you can add ioapic_in_kernel whenever you need to test "lapic_in_kernel && !irqchip_split" at the same time. Paolo > Compile tested for x86. > > Signed-off-by: Steve Rutherford <srutherford@google.com> > Suggested-by: Andrew Honig <ahonig@google.com> > --- > Documentation/virtual/kvm/api.txt | 15 ++++++++++++ > arch/powerpc/kvm/irq.h | 5 ++++ > arch/s390/kvm/irq.h | 4 ++++ > arch/x86/include/asm/kvm_host.h | 2 ++ > arch/x86/kvm/assigned-dev.c | 4 ++-- > arch/x86/kvm/irq.c | 6 ++--- > arch/x86/kvm/irq.h | 11 +++++++++ > arch/x86/kvm/irq_comm.c | 7 ++++++ > arch/x86/kvm/lapic.c | 13 +++++++---- > arch/x86/kvm/mmu.c | 2 +- > arch/x86/kvm/svm.c | 4 ++-- > arch/x86/kvm/vmx.c | 12 +++++----- > arch/x86/kvm/x86.c | 49 +++++++++++++++++++++++++++------------ > include/kvm/arm_vgic.h | 1 + > include/linux/kvm_host.h | 1 + > include/uapi/linux/kvm.h | 1 + > virt/kvm/irqchip.c | 2 +- > 17 files changed, 104 insertions(+), 35 deletions(-) > > diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt > index 6955444..9a43d42 100644 > --- a/Documentation/virtual/kvm/api.txt > +++ b/Documentation/virtual/kvm/api.txt > @@ -2979,6 +2979,7 @@ len must be a multiple of sizeof(struct kvm_s390_irq). It must be > 0 > and it must not exceed (max_vcpus + 32) * sizeof(struct kvm_s390_irq), > which is the maximum number of possibly pending cpu-local interrupts. > > + > 5. The kvm_run structure > ------------------------ > > @@ -3575,6 +3576,20 @@ struct { > > KVM handlers should exit to userspace with rc = -EREMOTE. > > +7.5 KVM_SPLIT_IRQCHIP > + > +Capability: KVM_CAP_SPLIT_IRQCHIP > +Architectures: x86 > +Type: VM ioctl > +Parameters: None > +Returns: 0 on success, -1 on error > + > +Create a local apic for each processor in the kernel. This differs from > +KVM_CREATE_IRQCHIP in that it only creates the local apic; it creates neither > +the ioapic nor the pic in the kernel. Also, enables in kernel routing of > +interrupt requests. Fails if VCPU has already been created, or if the irqchip is > +already in the kernel. > + > > 8. Other capabilities. > ---------------------- > diff --git a/arch/powerpc/kvm/irq.h b/arch/powerpc/kvm/irq.h > index 5a9a10b..5e6fa06 100644 > --- a/arch/powerpc/kvm/irq.h > +++ b/arch/powerpc/kvm/irq.h > @@ -17,4 +17,9 @@ static inline int irqchip_in_kernel(struct kvm *kvm) > return ret; > } > > +static inline int lapic_in_kernel(struct kvm *kvm) > +{ > + return irqchip_in_kernel(kvm); > +} > + > #endif > diff --git a/arch/s390/kvm/irq.h b/arch/s390/kvm/irq.h > index d98e415..db876c3 100644 > --- a/arch/s390/kvm/irq.h > +++ b/arch/s390/kvm/irq.h > @@ -19,4 +19,8 @@ static inline int irqchip_in_kernel(struct kvm *kvm) > return 1; > } > > +static inline int lapic_in_kernel(struct kvm *kvm) > +{ > + return irqchip_in_kernel(kvm); > +} > #endif > diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h > index 7276107..af3225a 100644 > --- a/arch/x86/include/asm/kvm_host.h > +++ b/arch/x86/include/asm/kvm_host.h > @@ -639,6 +639,8 @@ struct kvm_arch { > bool boot_vcpu_runs_old_kvmclock; > > u64 disabled_quirks; > + > + bool irqchip_split; > }; > > struct kvm_vm_stat { > diff --git a/arch/x86/kvm/assigned-dev.c b/arch/x86/kvm/assigned-dev.c > index d090ecf..1237e92 100644 > --- a/arch/x86/kvm/assigned-dev.c > +++ b/arch/x86/kvm/assigned-dev.c > @@ -291,7 +291,7 @@ static int kvm_deassign_irq(struct kvm *kvm, > { > unsigned long guest_irq_type, host_irq_type; > > - if (!irqchip_in_kernel(kvm)) > + if (!lapic_in_kernel(kvm)) > return -EINVAL; > /* no irq assignment to deassign */ > if (!assigned_dev->irq_requested_type) > @@ -568,7 +568,7 @@ static int kvm_vm_ioctl_assign_irq(struct kvm *kvm, > struct kvm_assigned_dev_kernel *match; > unsigned long host_irq_type, guest_irq_type; > > - if (!irqchip_in_kernel(kvm)) > + if (!lapic_in_kernel(kvm)) > return r; > > mutex_lock(&kvm->lock); > diff --git a/arch/x86/kvm/irq.c b/arch/x86/kvm/irq.c > index a1ec6a50..706e47a 100644 > --- a/arch/x86/kvm/irq.c > +++ b/arch/x86/kvm/irq.c > @@ -57,7 +57,7 @@ static int kvm_cpu_has_extint(struct kvm_vcpu *v) > */ > int kvm_cpu_has_injectable_intr(struct kvm_vcpu *v) > { > - if (!irqchip_in_kernel(v->kvm)) > + if (!lapic_in_kernel(v->kvm)) > return v->arch.interrupt.pending; > > if (kvm_cpu_has_extint(v)) > @@ -75,7 +75,7 @@ int kvm_cpu_has_injectable_intr(struct kvm_vcpu *v) > */ > int kvm_cpu_has_interrupt(struct kvm_vcpu *v) > { > - if (!irqchip_in_kernel(v->kvm)) > + if (!lapic_in_kernel(v->kvm)) > return v->arch.interrupt.pending; > > if (kvm_cpu_has_extint(v)) > @@ -103,7 +103,7 @@ int kvm_cpu_get_interrupt(struct kvm_vcpu *v) > { > int vector; > > - if (!irqchip_in_kernel(v->kvm)) > + if (!lapic_in_kernel(v->kvm)) > return v->arch.interrupt.nr; > > vector = kvm_cpu_get_extint(v); > diff --git a/arch/x86/kvm/irq.h b/arch/x86/kvm/irq.h > index ad68c73..e46abf3 100644 > --- a/arch/x86/kvm/irq.h > +++ b/arch/x86/kvm/irq.h > @@ -92,6 +92,17 @@ static inline int irqchip_in_kernel(struct kvm *kvm) > return ret; > } > > +static inline int irqchip_split(struct kvm *kvm) > +{ > + return kvm->arch.irqchip_split; > +} > + > +static inline int lapic_in_kernel(struct kvm *kvm) > +{ > + return irqchip_split(kvm) || irqchip_in_kernel(kvm); > +} > + > + > void kvm_pic_reset(struct kvm_kpic_state *s); > > void kvm_inject_pending_timer_irqs(struct kvm_vcpu *vcpu); > diff --git a/arch/x86/kvm/irq_comm.c b/arch/x86/kvm/irq_comm.c > index 9efff9e..f43c59a 100644 > --- a/arch/x86/kvm/irq_comm.c > +++ b/arch/x86/kvm/irq_comm.c > @@ -328,3 +328,10 @@ int kvm_setup_default_irq_routing(struct kvm *kvm) > return kvm_set_irq_routing(kvm, default_routing, > ARRAY_SIZE(default_routing), 0); > } > + > +static const struct kvm_irq_routing_entry empty_routing[] = {}; > + > +int kvm_setup_empty_irq_routing(struct kvm *kvm) > +{ > + return kvm_set_irq_routing(kvm, empty_routing, 0, 0); > +} > diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c > index c789e00..92f4c98 100644 > --- a/arch/x86/kvm/lapic.c > +++ b/arch/x86/kvm/lapic.c > @@ -209,7 +209,8 @@ out: > if (old) > kfree_rcu(old, rcu); > > - kvm_vcpu_request_scan_ioapic(kvm); > + if (!irqchip_split(kvm)) > + kvm_vcpu_request_scan_ioapic(kvm); > } > > static inline void apic_set_spiv(struct kvm_lapic *apic, u32 val) > @@ -1827,7 +1828,8 @@ void kvm_apic_post_state_restore(struct kvm_vcpu *vcpu, > kvm_x86_ops->hwapic_isr_update(vcpu->kvm, > apic_find_highest_isr(apic)); > kvm_make_request(KVM_REQ_EVENT, vcpu); > - kvm_rtc_eoi_tracking_restore_one(vcpu); > + if (!irqchip_split(vcpu->kvm)) > + kvm_rtc_eoi_tracking_restore_one(vcpu); > } > > void __kvm_migrate_apic_timer(struct kvm_vcpu *vcpu) > @@ -1910,7 +1912,8 @@ static void apic_sync_pv_eoi_to_guest(struct kvm_vcpu *vcpu, > /* Cache not set: could be safe but we don't bother. */ > apic->highest_isr_cache == -1 || > /* Need EOI to update ioapic. */ > - kvm_ioapic_handles_vector(vcpu->kvm, apic->highest_isr_cache)) { > + kvm_ioapic_handles_vector(vcpu->kvm, apic->highest_isr_cache) || > + irqchip_split(vcpu->kvm)) { > /* > * PV EOI was disabled by apic_sync_pv_eoi_from_guest > * so we need not do anything here. > @@ -1966,7 +1969,7 @@ int kvm_x2apic_msr_write(struct kvm_vcpu *vcpu, u32 msr, u64 data) > struct kvm_lapic *apic = vcpu->arch.apic; > u32 reg = (msr - APIC_BASE_MSR) << 4; > > - if (!irqchip_in_kernel(vcpu->kvm) || !apic_x2apic_mode(apic)) > + if (!lapic_in_kernel(vcpu->kvm) || !apic_x2apic_mode(apic)) > return 1; > > if (reg == APIC_ICR2) > @@ -1983,7 +1986,7 @@ int kvm_x2apic_msr_read(struct kvm_vcpu *vcpu, u32 msr, u64 *data) > struct kvm_lapic *apic = vcpu->arch.apic; > u32 reg = (msr - APIC_BASE_MSR) << 4, low, high = 0; > > - if (!irqchip_in_kernel(vcpu->kvm) || !apic_x2apic_mode(apic)) > + if (!lapic_in_kernel(vcpu->kvm) || !apic_x2apic_mode(apic)) > return 1; > > if (reg == APIC_DFR || reg == APIC_ICR2) { > diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c > index a65ce12..1513d14 100644 > --- a/arch/x86/kvm/mmu.c > +++ b/arch/x86/kvm/mmu.c > @@ -3507,7 +3507,7 @@ static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu, gva_t gva, gfn_t gfn) > > static bool can_do_async_pf(struct kvm_vcpu *vcpu) > { > - if (unlikely(!irqchip_in_kernel(vcpu->kvm) || > + if (unlikely(!lapic_in_kernel(vcpu->kvm) || > kvm_event_needs_reinjection(vcpu))) > return false; > > diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c > index b9f9e10..59166de 100644 > --- a/arch/x86/kvm/svm.c > +++ b/arch/x86/kvm/svm.c > @@ -3054,7 +3054,7 @@ static int cr8_write_interception(struct vcpu_svm *svm) > u8 cr8_prev = kvm_get_cr8(&svm->vcpu); > /* instruction emulation calls kvm_set_cr8() */ > r = cr_interception(svm); > - if (irqchip_in_kernel(svm->vcpu.kvm)) > + if (lapic_in_kernel(svm->vcpu.kvm)) > return r; > if (cr8_prev <= kvm_get_cr8(&svm->vcpu)) > return r; > @@ -3295,7 +3295,7 @@ static int interrupt_window_interception(struct vcpu_svm *svm) > * If the user space waits to inject interrupts, exit as soon as > * possible > */ > - if (!irqchip_in_kernel(svm->vcpu.kvm) && > + if (!lapic_in_kernel(svm->vcpu.kvm) && > kvm_run->request_interrupt_window && > !kvm_cpu_has_interrupt(&svm->vcpu)) { > kvm_run->exit_reason = KVM_EXIT_IRQ_WINDOW_OPEN; > diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c > index 9cf5030..3b58788 100644 > --- a/arch/x86/kvm/vmx.c > +++ b/arch/x86/kvm/vmx.c > @@ -948,7 +948,7 @@ static inline bool cpu_has_vmx_tpr_shadow(void) > > static inline bool vm_need_tpr_shadow(struct kvm *kvm) > { > - return (cpu_has_vmx_tpr_shadow()) && (irqchip_in_kernel(kvm)); > + return (cpu_has_vmx_tpr_shadow()) && lapic_in_kernel(kvm); > } > > static inline bool cpu_has_secondary_exec_ctrls(void) > @@ -1064,7 +1064,7 @@ static inline bool cpu_has_vmx_ple(void) > > static inline bool vm_need_virtualize_apic_accesses(struct kvm *kvm) > { > - return flexpriority_enabled && irqchip_in_kernel(kvm); > + return flexpriority_enabled && lapic_in_kernel(kvm); > } > > static inline bool cpu_has_vmx_vpid(void) > @@ -4341,7 +4341,7 @@ static void vmx_disable_intercept_msr_write_x2apic(u32 msr) > > static int vmx_vm_has_apicv(struct kvm *kvm) > { > - return enable_apicv && irqchip_in_kernel(kvm); > + return enable_apicv && lapic_in_kernel(kvm); > } > > static int vmx_complete_nested_posted_interrupt(struct kvm_vcpu *vcpu) > @@ -5317,7 +5317,7 @@ static int handle_cr(struct kvm_vcpu *vcpu) > u8 cr8 = (u8)val; > err = kvm_set_cr8(vcpu, cr8); > kvm_complete_insn_gp(vcpu, err); > - if (irqchip_in_kernel(vcpu->kvm)) > + if (lapic_in_kernel(vcpu->kvm)) > return 1; > if (cr8_prev <= cr8) > return 1; > @@ -5534,7 +5534,7 @@ static int handle_interrupt_window(struct kvm_vcpu *vcpu) > * If the user space waits to inject interrupts, exit as soon as > * possible > */ > - if (!irqchip_in_kernel(vcpu->kvm) && > + if (!lapic_in_kernel(vcpu->kvm) && > vcpu->run->request_interrupt_window && > !kvm_cpu_has_interrupt(vcpu)) { > vcpu->run->exit_reason = KVM_EXIT_IRQ_WINDOW_OPEN; > @@ -9419,7 +9419,7 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) > /* vmcs12's VM_ENTRY_LOAD_IA32_EFER and VM_ENTRY_IA32E_MODE are > * emulated by vmx_set_efer(), below. > */ > - vm_entry_controls_init(vmx, > + vm_entry_controls_init(vmx, > (vmcs12->vm_entry_controls & ~VM_ENTRY_LOAD_IA32_EFER & > ~VM_ENTRY_IA32E_MODE) | > (vmcs_config.vmentry_ctrl & ~VM_ENTRY_IA32E_MODE)); > diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c > index 79dde16..19c8980 100644 > --- a/arch/x86/kvm/x86.c > +++ b/arch/x86/kvm/x86.c > @@ -784,7 +784,7 @@ int kvm_set_cr8(struct kvm_vcpu *vcpu, unsigned long cr8) > { > if (cr8 & CR8_RESERVED_BITS) > return 1; > - if (irqchip_in_kernel(vcpu->kvm)) > + if (lapic_in_kernel(vcpu->kvm)) > kvm_lapic_set_tpr(vcpu, cr8); > else > vcpu->arch.cr8 = cr8; > @@ -794,7 +794,7 @@ EXPORT_SYMBOL_GPL(kvm_set_cr8); > > unsigned long kvm_get_cr8(struct kvm_vcpu *vcpu) > { > - if (irqchip_in_kernel(vcpu->kvm)) > + if (lapic_in_kernel(vcpu->kvm)) > return kvm_lapic_get_cr8(vcpu); > else > return vcpu->arch.cr8; > @@ -2866,6 +2866,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) > case KVM_CAP_TSC_DEADLINE_TIMER: > case KVM_CAP_ENABLE_CAP_VM: > case KVM_CAP_DISABLE_QUIRKS: > + case KVM_CAP_SPLIT_IRQCHIP: > #ifdef CONFIG_KVM_DEVICE_ASSIGNMENT > case KVM_CAP_ASSIGN_DEV_IRQ: > case KVM_CAP_PCI_2_3: > @@ -3068,7 +3069,7 @@ static int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu, > { > if (irq->irq >= KVM_NR_INTERRUPTS) > return -EINVAL; > - if (irqchip_in_kernel(vcpu->kvm)) > + if (lapic_in_kernel(vcpu->kvm)) > return -ENXIO; > > kvm_queue_interrupt(vcpu, irq->irq, false); > @@ -3546,7 +3547,7 @@ long kvm_arch_vcpu_ioctl(struct file *filp, > struct kvm_vapic_addr va; > > r = -EINVAL; > - if (!irqchip_in_kernel(vcpu->kvm)) > + if (!lapic_in_kernel(vcpu->kvm)) > goto out; > r = -EFAULT; > if (copy_from_user(&va, argp, sizeof va)) > @@ -3904,7 +3905,7 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log) > int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_event, > bool line_status) > { > - if (!irqchip_in_kernel(kvm)) > + if (!lapic_in_kernel(kvm)) > return -ENXIO; > > irq_event->status = kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, > @@ -3926,6 +3927,23 @@ static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, > kvm->arch.disabled_quirks = cap->args[0]; > r = 0; > break; > + case KVM_CAP_SPLIT_IRQCHIP: { > + mutex_lock(&kvm->lock); > + r = -EEXIST; > + if (lapic_in_kernel(kvm)) > + goto split_irqchip_unlock; > + r = -EINVAL; > + if (atomic_read(&kvm->online_vcpus)) > + goto split_irqchip_unlock; > + r = kvm_setup_empty_irq_routing(kvm); > + if (r) > + goto split_irqchip_unlock; > + kvm->arch.irqchip_split = true; > + r = 0; > +split_irqchip_unlock: > + mutex_unlock(&kvm->lock); > + break; > + } > default: > r = -EINVAL; > break; > @@ -4194,6 +4212,7 @@ long kvm_arch_vm_ioctl(struct file *filp, > r = kvm_vm_ioctl_enable_cap(kvm, &cap); > break; > } > + > default: > r = kvm_vm_ioctl_assigned_device(kvm, ioctl, arg); > } > @@ -5959,7 +5978,7 @@ void kvm_arch_exit(void) > int kvm_vcpu_halt(struct kvm_vcpu *vcpu) > { > ++vcpu->stat.halt_exits; > - if (irqchip_in_kernel(vcpu->kvm)) { > + if (lapic_in_kernel(vcpu->kvm)) { > vcpu->arch.mp_state = KVM_MP_STATE_HALTED; > return 1; > } else { > @@ -6126,7 +6145,7 @@ static int emulator_fix_hypercall(struct x86_emulate_ctxt *ctxt) > */ > static int dm_request_for_irq_injection(struct kvm_vcpu *vcpu) > { > - return (!irqchip_in_kernel(vcpu->kvm) && !kvm_cpu_has_interrupt(vcpu) && > + return (!lapic_in_kernel(vcpu->kvm) && !kvm_cpu_has_interrupt(vcpu) && > vcpu->run->request_interrupt_window && > kvm_arch_interrupt_allowed(vcpu)); > } > @@ -6138,7 +6157,7 @@ static void post_kvm_run_save(struct kvm_vcpu *vcpu) > kvm_run->if_flag = (kvm_get_rflags(vcpu) & X86_EFLAGS_IF) != 0; > kvm_run->cr8 = kvm_get_cr8(vcpu); > kvm_run->apic_base = kvm_get_apic_base(vcpu); > - if (irqchip_in_kernel(vcpu->kvm)) > + if (lapic_in_kernel(vcpu->kvm)) > kvm_run->ready_for_interrupt_injection = 1; > else > kvm_run->ready_for_interrupt_injection = > @@ -6285,7 +6304,7 @@ void kvm_vcpu_reload_apic_access_page(struct kvm_vcpu *vcpu) > { > struct page *page = NULL; > > - if (!irqchip_in_kernel(vcpu->kvm)) > + if (!lapic_in_kernel(vcpu->kvm)) > return; > > if (!kvm_x86_ops->set_apic_access_page_addr) > @@ -6323,7 +6342,7 @@ void kvm_arch_mmu_notifier_invalidate_page(struct kvm *kvm, > static int vcpu_enter_guest(struct kvm_vcpu *vcpu) > { > int r; > - bool req_int_win = !irqchip_in_kernel(vcpu->kvm) && > + bool req_int_win = !lapic_in_kernel(vcpu->kvm) && > vcpu->run->request_interrupt_window; > bool req_immediate_exit = false; > > @@ -6712,7 +6731,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) > } > > /* re-sync apic's tpr */ > - if (!irqchip_in_kernel(vcpu->kvm)) { > + if (!lapic_in_kernel(vcpu->kvm)) { > if (kvm_set_cr8(vcpu, kvm_run->cr8) != 0) { > r = -EINVAL; > goto out; > @@ -7421,7 +7440,7 @@ void kvm_arch_check_processor_compat(void *rtn) > > bool kvm_vcpu_compatible(struct kvm_vcpu *vcpu) > { > - return irqchip_in_kernel(vcpu->kvm) == (vcpu->arch.apic != NULL); > + return lapic_in_kernel(vcpu->kvm) == (vcpu->arch.apic != NULL); > } > > struct static_key kvm_no_apic_vcpu __read_mostly; > @@ -7437,7 +7456,7 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) > > vcpu->arch.pv.pv_unhalted = false; > vcpu->arch.emulate_ctxt.ops = &emulate_ops; > - if (!irqchip_in_kernel(kvm) || kvm_vcpu_is_reset_bsp(vcpu)) > + if (!lapic_in_kernel(kvm) || kvm_vcpu_is_reset_bsp(vcpu)) > vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; > else > vcpu->arch.mp_state = KVM_MP_STATE_UNINITIALIZED; > @@ -7455,7 +7474,7 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) > if (r < 0) > goto fail_free_pio_data; > > - if (irqchip_in_kernel(kvm)) { > + if (lapic_in_kernel(kvm)) { > r = kvm_create_lapic(vcpu); > if (r < 0) > goto fail_mmu_destroy; > @@ -7518,7 +7537,7 @@ void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) > kvm_mmu_destroy(vcpu); > srcu_read_unlock(&vcpu->kvm->srcu, idx); > free_page((unsigned long)vcpu->arch.pio_data); > - if (!irqchip_in_kernel(vcpu->kvm)) > + if (!lapic_in_kernel(vcpu->kvm)) > static_key_slow_dec(&kvm_no_apic_vcpu); > } > > diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h > index 133ea00..ffe1f4e 100644 > --- a/include/kvm/arm_vgic.h > +++ b/include/kvm/arm_vgic.h > @@ -329,6 +329,7 @@ int kvm_vgic_vcpu_pending_irq(struct kvm_vcpu *vcpu); > int kvm_vgic_vcpu_active_irq(struct kvm_vcpu *vcpu); > > #define irqchip_in_kernel(k) (!!((k)->arch.vgic.in_kernel)) > +#define lapic_in_kernel(k) (irqchip_in_kernel(k)) > #define vgic_initialized(k) (!!((k)->arch.vgic.nr_cpus)) > #define vgic_ready(k) ((k)->arch.vgic.ready) > > diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h > index a8bcbc9..7e2b41a 100644 > --- a/include/linux/kvm_host.h > +++ b/include/linux/kvm_host.h > @@ -935,6 +935,7 @@ static inline int mmu_notifier_retry(struct kvm *kvm, unsigned long mmu_seq) > #endif > > int kvm_setup_default_irq_routing(struct kvm *kvm); > +int kvm_setup_empty_irq_routing(struct kvm *kvm); > int kvm_set_irq_routing(struct kvm *kvm, > const struct kvm_irq_routing_entry *entries, > unsigned nr, > diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h > index 75bd9f7..1e6f6c3 100644 > --- a/include/uapi/linux/kvm.h > +++ b/include/uapi/linux/kvm.h > @@ -815,6 +815,7 @@ struct kvm_ppc_smmu_info { > #define KVM_CAP_S390_IRQ_STATE 114 > #define KVM_CAP_PPC_HWRNG 115 > #define KVM_CAP_DISABLE_QUIRKS 116 > +#define KVM_CAP_SPLIT_IRQCHIP 117 > > #ifdef KVM_CAP_IRQ_ROUTING > > diff --git a/virt/kvm/irqchip.c b/virt/kvm/irqchip.c > index 1d56a90..8aaceed 100644 > --- a/virt/kvm/irqchip.c > +++ b/virt/kvm/irqchip.c > @@ -73,7 +73,7 @@ int kvm_send_userspace_msi(struct kvm *kvm, struct kvm_msi *msi) > { > struct kvm_kernel_irq_routing_entry route; > > - if (!irqchip_in_kernel(kvm) || msi->flags != 0) > + if (!lapic_in_kernel(kvm) || msi->flags != 0) > return -EINVAL; > > route.msi.address_lo = msi->address_lo; > -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Wed, Jun 03, 2015 at 10:54:41AM +0200, Paolo Bonzini wrote: > > > On 03/06/2015 01:51, Steve Rutherford wrote: > > First patch in a series which enables the relocation of the > > PIC/IOAPIC to userspace. > > > > Adds capability KVM_CAP_SPLIT_IRQCHIP; > > > > KVM_CAP_SPLIT_IRQCHIP enables the construction of LAPICs without the > > rest of the irqchip. > > The documentation is not updated. Ack. > > Changing other arches is definitely a no-no, unfortunately. But there > are so many s/irqchip_in_kernel/lapic_in_kernel/ changes here, that I > wonder if you should just keep irqchip_in_kernel true in the "split > irqchip" case. You are already testing irqchip_split in a few cases, > and you can add ioapic_in_kernel whenever you need to test > "lapic_in_kernel && !irqchip_split" at the same time. From the perspective of avoiding impacting other architectures, this is a good idea, but the naming seems strange in the x86 case. Having irqchip_in_kernel be "true" when the ioapic/pic are in userspace seems strange. Admittedly, the irqchip isn't a "real" concept on x86, so inventing a new meaning is fine. Despite my hesitation, I'll change the naming around. Steve > > Paolo > > > Compile tested for x86. > > > > Signed-off-by: Steve Rutherford <srutherford@google.com> > > Suggested-by: Andrew Honig <ahonig@google.com> > > --- > > Documentation/virtual/kvm/api.txt | 15 ++++++++++++ > > arch/powerpc/kvm/irq.h | 5 ++++ > > arch/s390/kvm/irq.h | 4 ++++ > > arch/x86/include/asm/kvm_host.h | 2 ++ > > arch/x86/kvm/assigned-dev.c | 4 ++-- > > arch/x86/kvm/irq.c | 6 ++--- > > arch/x86/kvm/irq.h | 11 +++++++++ > > arch/x86/kvm/irq_comm.c | 7 ++++++ > > arch/x86/kvm/lapic.c | 13 +++++++---- > > arch/x86/kvm/mmu.c | 2 +- > > arch/x86/kvm/svm.c | 4 ++-- > > arch/x86/kvm/vmx.c | 12 +++++----- > > arch/x86/kvm/x86.c | 49 +++++++++++++++++++++++++++------------ > > include/kvm/arm_vgic.h | 1 + > > include/linux/kvm_host.h | 1 + > > include/uapi/linux/kvm.h | 1 + > > virt/kvm/irqchip.c | 2 +- > > 17 files changed, 104 insertions(+), 35 deletions(-) > > > > diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt > > index 6955444..9a43d42 100644 > > --- a/Documentation/virtual/kvm/api.txt > > +++ b/Documentation/virtual/kvm/api.txt > > @@ -2979,6 +2979,7 @@ len must be a multiple of sizeof(struct kvm_s390_irq). It must be > 0 > > and it must not exceed (max_vcpus + 32) * sizeof(struct kvm_s390_irq), > > which is the maximum number of possibly pending cpu-local interrupts. > > > > + > > 5. The kvm_run structure > > ------------------------ > > > > @@ -3575,6 +3576,20 @@ struct { > > > > KVM handlers should exit to userspace with rc = -EREMOTE. > > > > +7.5 KVM_SPLIT_IRQCHIP > > + > > +Capability: KVM_CAP_SPLIT_IRQCHIP > > +Architectures: x86 > > +Type: VM ioctl > > +Parameters: None > > +Returns: 0 on success, -1 on error > > + > > +Create a local apic for each processor in the kernel. This differs from > > +KVM_CREATE_IRQCHIP in that it only creates the local apic; it creates neither > > +the ioapic nor the pic in the kernel. Also, enables in kernel routing of > > +interrupt requests. Fails if VCPU has already been created, or if the irqchip is > > +already in the kernel. > > + > > > > 8. Other capabilities. > > ---------------------- > > diff --git a/arch/powerpc/kvm/irq.h b/arch/powerpc/kvm/irq.h > > index 5a9a10b..5e6fa06 100644 > > --- a/arch/powerpc/kvm/irq.h > > +++ b/arch/powerpc/kvm/irq.h > > @@ -17,4 +17,9 @@ static inline int irqchip_in_kernel(struct kvm *kvm) > > return ret; > > } > > > > +static inline int lapic_in_kernel(struct kvm *kvm) > > +{ > > + return irqchip_in_kernel(kvm); > > +} > > + > > #endif > > diff --git a/arch/s390/kvm/irq.h b/arch/s390/kvm/irq.h > > index d98e415..db876c3 100644 > > --- a/arch/s390/kvm/irq.h > > +++ b/arch/s390/kvm/irq.h > > @@ -19,4 +19,8 @@ static inline int irqchip_in_kernel(struct kvm *kvm) > > return 1; > > } > > > > +static inline int lapic_in_kernel(struct kvm *kvm) > > +{ > > + return irqchip_in_kernel(kvm); > > +} > > #endif > > diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h > > index 7276107..af3225a 100644 > > --- a/arch/x86/include/asm/kvm_host.h > > +++ b/arch/x86/include/asm/kvm_host.h > > @@ -639,6 +639,8 @@ struct kvm_arch { > > bool boot_vcpu_runs_old_kvmclock; > > > > u64 disabled_quirks; > > + > > + bool irqchip_split; > > }; > > > > struct kvm_vm_stat { > > diff --git a/arch/x86/kvm/assigned-dev.c b/arch/x86/kvm/assigned-dev.c > > index d090ecf..1237e92 100644 > > --- a/arch/x86/kvm/assigned-dev.c > > +++ b/arch/x86/kvm/assigned-dev.c > > @@ -291,7 +291,7 @@ static int kvm_deassign_irq(struct kvm *kvm, > > { > > unsigned long guest_irq_type, host_irq_type; > > > > - if (!irqchip_in_kernel(kvm)) > > + if (!lapic_in_kernel(kvm)) > > return -EINVAL; > > /* no irq assignment to deassign */ > > if (!assigned_dev->irq_requested_type) > > @@ -568,7 +568,7 @@ static int kvm_vm_ioctl_assign_irq(struct kvm *kvm, > > struct kvm_assigned_dev_kernel *match; > > unsigned long host_irq_type, guest_irq_type; > > > > - if (!irqchip_in_kernel(kvm)) > > + if (!lapic_in_kernel(kvm)) > > return r; > > > > mutex_lock(&kvm->lock); > > diff --git a/arch/x86/kvm/irq.c b/arch/x86/kvm/irq.c > > index a1ec6a50..706e47a 100644 > > --- a/arch/x86/kvm/irq.c > > +++ b/arch/x86/kvm/irq.c > > @@ -57,7 +57,7 @@ static int kvm_cpu_has_extint(struct kvm_vcpu *v) > > */ > > int kvm_cpu_has_injectable_intr(struct kvm_vcpu *v) > > { > > - if (!irqchip_in_kernel(v->kvm)) > > + if (!lapic_in_kernel(v->kvm)) > > return v->arch.interrupt.pending; > > > > if (kvm_cpu_has_extint(v)) > > @@ -75,7 +75,7 @@ int kvm_cpu_has_injectable_intr(struct kvm_vcpu *v) > > */ > > int kvm_cpu_has_interrupt(struct kvm_vcpu *v) > > { > > - if (!irqchip_in_kernel(v->kvm)) > > + if (!lapic_in_kernel(v->kvm)) > > return v->arch.interrupt.pending; > > > > if (kvm_cpu_has_extint(v)) > > @@ -103,7 +103,7 @@ int kvm_cpu_get_interrupt(struct kvm_vcpu *v) > > { > > int vector; > > > > - if (!irqchip_in_kernel(v->kvm)) > > + if (!lapic_in_kernel(v->kvm)) > > return v->arch.interrupt.nr; > > > > vector = kvm_cpu_get_extint(v); > > diff --git a/arch/x86/kvm/irq.h b/arch/x86/kvm/irq.h > > index ad68c73..e46abf3 100644 > > --- a/arch/x86/kvm/irq.h > > +++ b/arch/x86/kvm/irq.h > > @@ -92,6 +92,17 @@ static inline int irqchip_in_kernel(struct kvm *kvm) > > return ret; > > } > > > > +static inline int irqchip_split(struct kvm *kvm) > > +{ > > + return kvm->arch.irqchip_split; > > +} > > + > > +static inline int lapic_in_kernel(struct kvm *kvm) > > +{ > > + return irqchip_split(kvm) || irqchip_in_kernel(kvm); > > +} > > + > > + > > void kvm_pic_reset(struct kvm_kpic_state *s); > > > > void kvm_inject_pending_timer_irqs(struct kvm_vcpu *vcpu); > > diff --git a/arch/x86/kvm/irq_comm.c b/arch/x86/kvm/irq_comm.c > > index 9efff9e..f43c59a 100644 > > --- a/arch/x86/kvm/irq_comm.c > > +++ b/arch/x86/kvm/irq_comm.c > > @@ -328,3 +328,10 @@ int kvm_setup_default_irq_routing(struct kvm *kvm) > > return kvm_set_irq_routing(kvm, default_routing, > > ARRAY_SIZE(default_routing), 0); > > } > > + > > +static const struct kvm_irq_routing_entry empty_routing[] = {}; > > + > > +int kvm_setup_empty_irq_routing(struct kvm *kvm) > > +{ > > + return kvm_set_irq_routing(kvm, empty_routing, 0, 0); > > +} > > diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c > > index c789e00..92f4c98 100644 > > --- a/arch/x86/kvm/lapic.c > > +++ b/arch/x86/kvm/lapic.c > > @@ -209,7 +209,8 @@ out: > > if (old) > > kfree_rcu(old, rcu); > > > > - kvm_vcpu_request_scan_ioapic(kvm); > > + if (!irqchip_split(kvm)) > > + kvm_vcpu_request_scan_ioapic(kvm); > > } > > > > static inline void apic_set_spiv(struct kvm_lapic *apic, u32 val) > > @@ -1827,7 +1828,8 @@ void kvm_apic_post_state_restore(struct kvm_vcpu *vcpu, > > kvm_x86_ops->hwapic_isr_update(vcpu->kvm, > > apic_find_highest_isr(apic)); > > kvm_make_request(KVM_REQ_EVENT, vcpu); > > - kvm_rtc_eoi_tracking_restore_one(vcpu); > > + if (!irqchip_split(vcpu->kvm)) > > + kvm_rtc_eoi_tracking_restore_one(vcpu); > > } > > > > void __kvm_migrate_apic_timer(struct kvm_vcpu *vcpu) > > @@ -1910,7 +1912,8 @@ static void apic_sync_pv_eoi_to_guest(struct kvm_vcpu *vcpu, > > /* Cache not set: could be safe but we don't bother. */ > > apic->highest_isr_cache == -1 || > > /* Need EOI to update ioapic. */ > > - kvm_ioapic_handles_vector(vcpu->kvm, apic->highest_isr_cache)) { > > + kvm_ioapic_handles_vector(vcpu->kvm, apic->highest_isr_cache) || > > + irqchip_split(vcpu->kvm)) { > > /* > > * PV EOI was disabled by apic_sync_pv_eoi_from_guest > > * so we need not do anything here. > > @@ -1966,7 +1969,7 @@ int kvm_x2apic_msr_write(struct kvm_vcpu *vcpu, u32 msr, u64 data) > > struct kvm_lapic *apic = vcpu->arch.apic; > > u32 reg = (msr - APIC_BASE_MSR) << 4; > > > > - if (!irqchip_in_kernel(vcpu->kvm) || !apic_x2apic_mode(apic)) > > + if (!lapic_in_kernel(vcpu->kvm) || !apic_x2apic_mode(apic)) > > return 1; > > > > if (reg == APIC_ICR2) > > @@ -1983,7 +1986,7 @@ int kvm_x2apic_msr_read(struct kvm_vcpu *vcpu, u32 msr, u64 *data) > > struct kvm_lapic *apic = vcpu->arch.apic; > > u32 reg = (msr - APIC_BASE_MSR) << 4, low, high = 0; > > > > - if (!irqchip_in_kernel(vcpu->kvm) || !apic_x2apic_mode(apic)) > > + if (!lapic_in_kernel(vcpu->kvm) || !apic_x2apic_mode(apic)) > > return 1; > > > > if (reg == APIC_DFR || reg == APIC_ICR2) { > > diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c > > index a65ce12..1513d14 100644 > > --- a/arch/x86/kvm/mmu.c > > +++ b/arch/x86/kvm/mmu.c > > @@ -3507,7 +3507,7 @@ static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu, gva_t gva, gfn_t gfn) > > > > static bool can_do_async_pf(struct kvm_vcpu *vcpu) > > { > > - if (unlikely(!irqchip_in_kernel(vcpu->kvm) || > > + if (unlikely(!lapic_in_kernel(vcpu->kvm) || > > kvm_event_needs_reinjection(vcpu))) > > return false; > > > > diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c > > index b9f9e10..59166de 100644 > > --- a/arch/x86/kvm/svm.c > > +++ b/arch/x86/kvm/svm.c > > @@ -3054,7 +3054,7 @@ static int cr8_write_interception(struct vcpu_svm *svm) > > u8 cr8_prev = kvm_get_cr8(&svm->vcpu); > > /* instruction emulation calls kvm_set_cr8() */ > > r = cr_interception(svm); > > - if (irqchip_in_kernel(svm->vcpu.kvm)) > > + if (lapic_in_kernel(svm->vcpu.kvm)) > > return r; > > if (cr8_prev <= kvm_get_cr8(&svm->vcpu)) > > return r; > > @@ -3295,7 +3295,7 @@ static int interrupt_window_interception(struct vcpu_svm *svm) > > * If the user space waits to inject interrupts, exit as soon as > > * possible > > */ > > - if (!irqchip_in_kernel(svm->vcpu.kvm) && > > + if (!lapic_in_kernel(svm->vcpu.kvm) && > > kvm_run->request_interrupt_window && > > !kvm_cpu_has_interrupt(&svm->vcpu)) { > > kvm_run->exit_reason = KVM_EXIT_IRQ_WINDOW_OPEN; > > diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c > > index 9cf5030..3b58788 100644 > > --- a/arch/x86/kvm/vmx.c > > +++ b/arch/x86/kvm/vmx.c > > @@ -948,7 +948,7 @@ static inline bool cpu_has_vmx_tpr_shadow(void) > > > > static inline bool vm_need_tpr_shadow(struct kvm *kvm) > > { > > - return (cpu_has_vmx_tpr_shadow()) && (irqchip_in_kernel(kvm)); > > + return (cpu_has_vmx_tpr_shadow()) && lapic_in_kernel(kvm); > > } > > > > static inline bool cpu_has_secondary_exec_ctrls(void) > > @@ -1064,7 +1064,7 @@ static inline bool cpu_has_vmx_ple(void) > > > > static inline bool vm_need_virtualize_apic_accesses(struct kvm *kvm) > > { > > - return flexpriority_enabled && irqchip_in_kernel(kvm); > > + return flexpriority_enabled && lapic_in_kernel(kvm); > > } > > > > static inline bool cpu_has_vmx_vpid(void) > > @@ -4341,7 +4341,7 @@ static void vmx_disable_intercept_msr_write_x2apic(u32 msr) > > > > static int vmx_vm_has_apicv(struct kvm *kvm) > > { > > - return enable_apicv && irqchip_in_kernel(kvm); > > + return enable_apicv && lapic_in_kernel(kvm); > > } > > > > static int vmx_complete_nested_posted_interrupt(struct kvm_vcpu *vcpu) > > @@ -5317,7 +5317,7 @@ static int handle_cr(struct kvm_vcpu *vcpu) > > u8 cr8 = (u8)val; > > err = kvm_set_cr8(vcpu, cr8); > > kvm_complete_insn_gp(vcpu, err); > > - if (irqchip_in_kernel(vcpu->kvm)) > > + if (lapic_in_kernel(vcpu->kvm)) > > return 1; > > if (cr8_prev <= cr8) > > return 1; > > @@ -5534,7 +5534,7 @@ static int handle_interrupt_window(struct kvm_vcpu *vcpu) > > * If the user space waits to inject interrupts, exit as soon as > > * possible > > */ > > - if (!irqchip_in_kernel(vcpu->kvm) && > > + if (!lapic_in_kernel(vcpu->kvm) && > > vcpu->run->request_interrupt_window && > > !kvm_cpu_has_interrupt(vcpu)) { > > vcpu->run->exit_reason = KVM_EXIT_IRQ_WINDOW_OPEN; > > @@ -9419,7 +9419,7 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) > > /* vmcs12's VM_ENTRY_LOAD_IA32_EFER and VM_ENTRY_IA32E_MODE are > > * emulated by vmx_set_efer(), below. > > */ > > - vm_entry_controls_init(vmx, > > + vm_entry_controls_init(vmx, > > (vmcs12->vm_entry_controls & ~VM_ENTRY_LOAD_IA32_EFER & > > ~VM_ENTRY_IA32E_MODE) | > > (vmcs_config.vmentry_ctrl & ~VM_ENTRY_IA32E_MODE)); > > diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c > > index 79dde16..19c8980 100644 > > --- a/arch/x86/kvm/x86.c > > +++ b/arch/x86/kvm/x86.c > > @@ -784,7 +784,7 @@ int kvm_set_cr8(struct kvm_vcpu *vcpu, unsigned long cr8) > > { > > if (cr8 & CR8_RESERVED_BITS) > > return 1; > > - if (irqchip_in_kernel(vcpu->kvm)) > > + if (lapic_in_kernel(vcpu->kvm)) > > kvm_lapic_set_tpr(vcpu, cr8); > > else > > vcpu->arch.cr8 = cr8; > > @@ -794,7 +794,7 @@ EXPORT_SYMBOL_GPL(kvm_set_cr8); > > > > unsigned long kvm_get_cr8(struct kvm_vcpu *vcpu) > > { > > - if (irqchip_in_kernel(vcpu->kvm)) > > + if (lapic_in_kernel(vcpu->kvm)) > > return kvm_lapic_get_cr8(vcpu); > > else > > return vcpu->arch.cr8; > > @@ -2866,6 +2866,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) > > case KVM_CAP_TSC_DEADLINE_TIMER: > > case KVM_CAP_ENABLE_CAP_VM: > > case KVM_CAP_DISABLE_QUIRKS: > > + case KVM_CAP_SPLIT_IRQCHIP: > > #ifdef CONFIG_KVM_DEVICE_ASSIGNMENT > > case KVM_CAP_ASSIGN_DEV_IRQ: > > case KVM_CAP_PCI_2_3: > > @@ -3068,7 +3069,7 @@ static int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu, > > { > > if (irq->irq >= KVM_NR_INTERRUPTS) > > return -EINVAL; > > - if (irqchip_in_kernel(vcpu->kvm)) > > + if (lapic_in_kernel(vcpu->kvm)) > > return -ENXIO; > > > > kvm_queue_interrupt(vcpu, irq->irq, false); > > @@ -3546,7 +3547,7 @@ long kvm_arch_vcpu_ioctl(struct file *filp, > > struct kvm_vapic_addr va; > > > > r = -EINVAL; > > - if (!irqchip_in_kernel(vcpu->kvm)) > > + if (!lapic_in_kernel(vcpu->kvm)) > > goto out; > > r = -EFAULT; > > if (copy_from_user(&va, argp, sizeof va)) > > @@ -3904,7 +3905,7 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log) > > int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_event, > > bool line_status) > > { > > - if (!irqchip_in_kernel(kvm)) > > + if (!lapic_in_kernel(kvm)) > > return -ENXIO; > > > > irq_event->status = kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, > > @@ -3926,6 +3927,23 @@ static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, > > kvm->arch.disabled_quirks = cap->args[0]; > > r = 0; > > break; > > + case KVM_CAP_SPLIT_IRQCHIP: { > > + mutex_lock(&kvm->lock); > > + r = -EEXIST; > > + if (lapic_in_kernel(kvm)) > > + goto split_irqchip_unlock; > > + r = -EINVAL; > > + if (atomic_read(&kvm->online_vcpus)) > > + goto split_irqchip_unlock; > > + r = kvm_setup_empty_irq_routing(kvm); > > + if (r) > > + goto split_irqchip_unlock; > > + kvm->arch.irqchip_split = true; > > + r = 0; > > +split_irqchip_unlock: > > + mutex_unlock(&kvm->lock); > > + break; > > + } > > default: > > r = -EINVAL; > > break; > > @@ -4194,6 +4212,7 @@ long kvm_arch_vm_ioctl(struct file *filp, > > r = kvm_vm_ioctl_enable_cap(kvm, &cap); > > break; > > } > > + > > default: > > r = kvm_vm_ioctl_assigned_device(kvm, ioctl, arg); > > } > > @@ -5959,7 +5978,7 @@ void kvm_arch_exit(void) > > int kvm_vcpu_halt(struct kvm_vcpu *vcpu) > > { > > ++vcpu->stat.halt_exits; > > - if (irqchip_in_kernel(vcpu->kvm)) { > > + if (lapic_in_kernel(vcpu->kvm)) { > > vcpu->arch.mp_state = KVM_MP_STATE_HALTED; > > return 1; > > } else { > > @@ -6126,7 +6145,7 @@ static int emulator_fix_hypercall(struct x86_emulate_ctxt *ctxt) > > */ > > static int dm_request_for_irq_injection(struct kvm_vcpu *vcpu) > > { > > - return (!irqchip_in_kernel(vcpu->kvm) && !kvm_cpu_has_interrupt(vcpu) && > > + return (!lapic_in_kernel(vcpu->kvm) && !kvm_cpu_has_interrupt(vcpu) && > > vcpu->run->request_interrupt_window && > > kvm_arch_interrupt_allowed(vcpu)); > > } > > @@ -6138,7 +6157,7 @@ static void post_kvm_run_save(struct kvm_vcpu *vcpu) > > kvm_run->if_flag = (kvm_get_rflags(vcpu) & X86_EFLAGS_IF) != 0; > > kvm_run->cr8 = kvm_get_cr8(vcpu); > > kvm_run->apic_base = kvm_get_apic_base(vcpu); > > - if (irqchip_in_kernel(vcpu->kvm)) > > + if (lapic_in_kernel(vcpu->kvm)) > > kvm_run->ready_for_interrupt_injection = 1; > > else > > kvm_run->ready_for_interrupt_injection = > > @@ -6285,7 +6304,7 @@ void kvm_vcpu_reload_apic_access_page(struct kvm_vcpu *vcpu) > > { > > struct page *page = NULL; > > > > - if (!irqchip_in_kernel(vcpu->kvm)) > > + if (!lapic_in_kernel(vcpu->kvm)) > > return; > > > > if (!kvm_x86_ops->set_apic_access_page_addr) > > @@ -6323,7 +6342,7 @@ void kvm_arch_mmu_notifier_invalidate_page(struct kvm *kvm, > > static int vcpu_enter_guest(struct kvm_vcpu *vcpu) > > { > > int r; > > - bool req_int_win = !irqchip_in_kernel(vcpu->kvm) && > > + bool req_int_win = !lapic_in_kernel(vcpu->kvm) && > > vcpu->run->request_interrupt_window; > > bool req_immediate_exit = false; > > > > @@ -6712,7 +6731,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) > > } > > > > /* re-sync apic's tpr */ > > - if (!irqchip_in_kernel(vcpu->kvm)) { > > + if (!lapic_in_kernel(vcpu->kvm)) { > > if (kvm_set_cr8(vcpu, kvm_run->cr8) != 0) { > > r = -EINVAL; > > goto out; > > @@ -7421,7 +7440,7 @@ void kvm_arch_check_processor_compat(void *rtn) > > > > bool kvm_vcpu_compatible(struct kvm_vcpu *vcpu) > > { > > - return irqchip_in_kernel(vcpu->kvm) == (vcpu->arch.apic != NULL); > > + return lapic_in_kernel(vcpu->kvm) == (vcpu->arch.apic != NULL); > > } > > > > struct static_key kvm_no_apic_vcpu __read_mostly; > > @@ -7437,7 +7456,7 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) > > > > vcpu->arch.pv.pv_unhalted = false; > > vcpu->arch.emulate_ctxt.ops = &emulate_ops; > > - if (!irqchip_in_kernel(kvm) || kvm_vcpu_is_reset_bsp(vcpu)) > > + if (!lapic_in_kernel(kvm) || kvm_vcpu_is_reset_bsp(vcpu)) > > vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; > > else > > vcpu->arch.mp_state = KVM_MP_STATE_UNINITIALIZED; > > @@ -7455,7 +7474,7 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) > > if (r < 0) > > goto fail_free_pio_data; > > > > - if (irqchip_in_kernel(kvm)) { > > + if (lapic_in_kernel(kvm)) { > > r = kvm_create_lapic(vcpu); > > if (r < 0) > > goto fail_mmu_destroy; > > @@ -7518,7 +7537,7 @@ void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) > > kvm_mmu_destroy(vcpu); > > srcu_read_unlock(&vcpu->kvm->srcu, idx); > > free_page((unsigned long)vcpu->arch.pio_data); > > - if (!irqchip_in_kernel(vcpu->kvm)) > > + if (!lapic_in_kernel(vcpu->kvm)) > > static_key_slow_dec(&kvm_no_apic_vcpu); > > } > > > > diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h > > index 133ea00..ffe1f4e 100644 > > --- a/include/kvm/arm_vgic.h > > +++ b/include/kvm/arm_vgic.h > > @@ -329,6 +329,7 @@ int kvm_vgic_vcpu_pending_irq(struct kvm_vcpu *vcpu); > > int kvm_vgic_vcpu_active_irq(struct kvm_vcpu *vcpu); > > > > #define irqchip_in_kernel(k) (!!((k)->arch.vgic.in_kernel)) > > +#define lapic_in_kernel(k) (irqchip_in_kernel(k)) > > #define vgic_initialized(k) (!!((k)->arch.vgic.nr_cpus)) > > #define vgic_ready(k) ((k)->arch.vgic.ready) > > > > diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h > > index a8bcbc9..7e2b41a 100644 > > --- a/include/linux/kvm_host.h > > +++ b/include/linux/kvm_host.h > > @@ -935,6 +935,7 @@ static inline int mmu_notifier_retry(struct kvm *kvm, unsigned long mmu_seq) > > #endif > > > > int kvm_setup_default_irq_routing(struct kvm *kvm); > > +int kvm_setup_empty_irq_routing(struct kvm *kvm); > > int kvm_set_irq_routing(struct kvm *kvm, > > const struct kvm_irq_routing_entry *entries, > > unsigned nr, > > diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h > > index 75bd9f7..1e6f6c3 100644 > > --- a/include/uapi/linux/kvm.h > > +++ b/include/uapi/linux/kvm.h > > @@ -815,6 +815,7 @@ struct kvm_ppc_smmu_info { > > #define KVM_CAP_S390_IRQ_STATE 114 > > #define KVM_CAP_PPC_HWRNG 115 > > #define KVM_CAP_DISABLE_QUIRKS 116 > > +#define KVM_CAP_SPLIT_IRQCHIP 117 > > > > #ifdef KVM_CAP_IRQ_ROUTING > > > > diff --git a/virt/kvm/irqchip.c b/virt/kvm/irqchip.c > > index 1d56a90..8aaceed 100644 > > --- a/virt/kvm/irqchip.c > > +++ b/virt/kvm/irqchip.c > > @@ -73,7 +73,7 @@ int kvm_send_userspace_msi(struct kvm *kvm, struct kvm_msi *msi) > > { > > struct kvm_kernel_irq_routing_entry route; > > > > - if (!irqchip_in_kernel(kvm) || msi->flags != 0) > > + if (!lapic_in_kernel(kvm) || msi->flags != 0) > > return -EINVAL; > > > > route.msi.address_lo = msi->address_lo; > > -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
> From the perspective of avoiding impacting other architectures, this is a > good idea, but the naming seems strange in the x86 case. Having > irqchip_in_kernel be "true" when the ioapic/pic are in userspace seems > strange. Admittedly, the irqchip isn't a "real" concept on x86, so > inventing a new meaning is fine. From the KVM point of view, the "irqchip" is whatever delivers interrupts to the vCPU---which is the LAPIC for x86. Paolo -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index 6955444..9a43d42 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt @@ -2979,6 +2979,7 @@ len must be a multiple of sizeof(struct kvm_s390_irq). It must be > 0 and it must not exceed (max_vcpus + 32) * sizeof(struct kvm_s390_irq), which is the maximum number of possibly pending cpu-local interrupts. + 5. The kvm_run structure ------------------------ @@ -3575,6 +3576,20 @@ struct { KVM handlers should exit to userspace with rc = -EREMOTE. +7.5 KVM_SPLIT_IRQCHIP + +Capability: KVM_CAP_SPLIT_IRQCHIP +Architectures: x86 +Type: VM ioctl +Parameters: None +Returns: 0 on success, -1 on error + +Create a local apic for each processor in the kernel. This differs from +KVM_CREATE_IRQCHIP in that it only creates the local apic; it creates neither +the ioapic nor the pic in the kernel. Also, enables in kernel routing of +interrupt requests. Fails if VCPU has already been created, or if the irqchip is +already in the kernel. + 8. Other capabilities. ---------------------- diff --git a/arch/powerpc/kvm/irq.h b/arch/powerpc/kvm/irq.h index 5a9a10b..5e6fa06 100644 --- a/arch/powerpc/kvm/irq.h +++ b/arch/powerpc/kvm/irq.h @@ -17,4 +17,9 @@ static inline int irqchip_in_kernel(struct kvm *kvm) return ret; } +static inline int lapic_in_kernel(struct kvm *kvm) +{ + return irqchip_in_kernel(kvm); +} + #endif diff --git a/arch/s390/kvm/irq.h b/arch/s390/kvm/irq.h index d98e415..db876c3 100644 --- a/arch/s390/kvm/irq.h +++ b/arch/s390/kvm/irq.h @@ -19,4 +19,8 @@ static inline int irqchip_in_kernel(struct kvm *kvm) return 1; } +static inline int lapic_in_kernel(struct kvm *kvm) +{ + return irqchip_in_kernel(kvm); +} #endif diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 7276107..af3225a 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -639,6 +639,8 @@ struct kvm_arch { bool boot_vcpu_runs_old_kvmclock; u64 disabled_quirks; + + bool irqchip_split; }; struct kvm_vm_stat { diff --git a/arch/x86/kvm/assigned-dev.c b/arch/x86/kvm/assigned-dev.c index d090ecf..1237e92 100644 --- a/arch/x86/kvm/assigned-dev.c +++ b/arch/x86/kvm/assigned-dev.c @@ -291,7 +291,7 @@ static int kvm_deassign_irq(struct kvm *kvm, { unsigned long guest_irq_type, host_irq_type; - if (!irqchip_in_kernel(kvm)) + if (!lapic_in_kernel(kvm)) return -EINVAL; /* no irq assignment to deassign */ if (!assigned_dev->irq_requested_type) @@ -568,7 +568,7 @@ static int kvm_vm_ioctl_assign_irq(struct kvm *kvm, struct kvm_assigned_dev_kernel *match; unsigned long host_irq_type, guest_irq_type; - if (!irqchip_in_kernel(kvm)) + if (!lapic_in_kernel(kvm)) return r; mutex_lock(&kvm->lock); diff --git a/arch/x86/kvm/irq.c b/arch/x86/kvm/irq.c index a1ec6a50..706e47a 100644 --- a/arch/x86/kvm/irq.c +++ b/arch/x86/kvm/irq.c @@ -57,7 +57,7 @@ static int kvm_cpu_has_extint(struct kvm_vcpu *v) */ int kvm_cpu_has_injectable_intr(struct kvm_vcpu *v) { - if (!irqchip_in_kernel(v->kvm)) + if (!lapic_in_kernel(v->kvm)) return v->arch.interrupt.pending; if (kvm_cpu_has_extint(v)) @@ -75,7 +75,7 @@ int kvm_cpu_has_injectable_intr(struct kvm_vcpu *v) */ int kvm_cpu_has_interrupt(struct kvm_vcpu *v) { - if (!irqchip_in_kernel(v->kvm)) + if (!lapic_in_kernel(v->kvm)) return v->arch.interrupt.pending; if (kvm_cpu_has_extint(v)) @@ -103,7 +103,7 @@ int kvm_cpu_get_interrupt(struct kvm_vcpu *v) { int vector; - if (!irqchip_in_kernel(v->kvm)) + if (!lapic_in_kernel(v->kvm)) return v->arch.interrupt.nr; vector = kvm_cpu_get_extint(v); diff --git a/arch/x86/kvm/irq.h b/arch/x86/kvm/irq.h index ad68c73..e46abf3 100644 --- a/arch/x86/kvm/irq.h +++ b/arch/x86/kvm/irq.h @@ -92,6 +92,17 @@ static inline int irqchip_in_kernel(struct kvm *kvm) return ret; } +static inline int irqchip_split(struct kvm *kvm) +{ + return kvm->arch.irqchip_split; +} + +static inline int lapic_in_kernel(struct kvm *kvm) +{ + return irqchip_split(kvm) || irqchip_in_kernel(kvm); +} + + void kvm_pic_reset(struct kvm_kpic_state *s); void kvm_inject_pending_timer_irqs(struct kvm_vcpu *vcpu); diff --git a/arch/x86/kvm/irq_comm.c b/arch/x86/kvm/irq_comm.c index 9efff9e..f43c59a 100644 --- a/arch/x86/kvm/irq_comm.c +++ b/arch/x86/kvm/irq_comm.c @@ -328,3 +328,10 @@ int kvm_setup_default_irq_routing(struct kvm *kvm) return kvm_set_irq_routing(kvm, default_routing, ARRAY_SIZE(default_routing), 0); } + +static const struct kvm_irq_routing_entry empty_routing[] = {}; + +int kvm_setup_empty_irq_routing(struct kvm *kvm) +{ + return kvm_set_irq_routing(kvm, empty_routing, 0, 0); +} diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index c789e00..92f4c98 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -209,7 +209,8 @@ out: if (old) kfree_rcu(old, rcu); - kvm_vcpu_request_scan_ioapic(kvm); + if (!irqchip_split(kvm)) + kvm_vcpu_request_scan_ioapic(kvm); } static inline void apic_set_spiv(struct kvm_lapic *apic, u32 val) @@ -1827,7 +1828,8 @@ void kvm_apic_post_state_restore(struct kvm_vcpu *vcpu, kvm_x86_ops->hwapic_isr_update(vcpu->kvm, apic_find_highest_isr(apic)); kvm_make_request(KVM_REQ_EVENT, vcpu); - kvm_rtc_eoi_tracking_restore_one(vcpu); + if (!irqchip_split(vcpu->kvm)) + kvm_rtc_eoi_tracking_restore_one(vcpu); } void __kvm_migrate_apic_timer(struct kvm_vcpu *vcpu) @@ -1910,7 +1912,8 @@ static void apic_sync_pv_eoi_to_guest(struct kvm_vcpu *vcpu, /* Cache not set: could be safe but we don't bother. */ apic->highest_isr_cache == -1 || /* Need EOI to update ioapic. */ - kvm_ioapic_handles_vector(vcpu->kvm, apic->highest_isr_cache)) { + kvm_ioapic_handles_vector(vcpu->kvm, apic->highest_isr_cache) || + irqchip_split(vcpu->kvm)) { /* * PV EOI was disabled by apic_sync_pv_eoi_from_guest * so we need not do anything here. @@ -1966,7 +1969,7 @@ int kvm_x2apic_msr_write(struct kvm_vcpu *vcpu, u32 msr, u64 data) struct kvm_lapic *apic = vcpu->arch.apic; u32 reg = (msr - APIC_BASE_MSR) << 4; - if (!irqchip_in_kernel(vcpu->kvm) || !apic_x2apic_mode(apic)) + if (!lapic_in_kernel(vcpu->kvm) || !apic_x2apic_mode(apic)) return 1; if (reg == APIC_ICR2) @@ -1983,7 +1986,7 @@ int kvm_x2apic_msr_read(struct kvm_vcpu *vcpu, u32 msr, u64 *data) struct kvm_lapic *apic = vcpu->arch.apic; u32 reg = (msr - APIC_BASE_MSR) << 4, low, high = 0; - if (!irqchip_in_kernel(vcpu->kvm) || !apic_x2apic_mode(apic)) + if (!lapic_in_kernel(vcpu->kvm) || !apic_x2apic_mode(apic)) return 1; if (reg == APIC_DFR || reg == APIC_ICR2) { diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index a65ce12..1513d14 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -3507,7 +3507,7 @@ static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu, gva_t gva, gfn_t gfn) static bool can_do_async_pf(struct kvm_vcpu *vcpu) { - if (unlikely(!irqchip_in_kernel(vcpu->kvm) || + if (unlikely(!lapic_in_kernel(vcpu->kvm) || kvm_event_needs_reinjection(vcpu))) return false; diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index b9f9e10..59166de 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -3054,7 +3054,7 @@ static int cr8_write_interception(struct vcpu_svm *svm) u8 cr8_prev = kvm_get_cr8(&svm->vcpu); /* instruction emulation calls kvm_set_cr8() */ r = cr_interception(svm); - if (irqchip_in_kernel(svm->vcpu.kvm)) + if (lapic_in_kernel(svm->vcpu.kvm)) return r; if (cr8_prev <= kvm_get_cr8(&svm->vcpu)) return r; @@ -3295,7 +3295,7 @@ static int interrupt_window_interception(struct vcpu_svm *svm) * If the user space waits to inject interrupts, exit as soon as * possible */ - if (!irqchip_in_kernel(svm->vcpu.kvm) && + if (!lapic_in_kernel(svm->vcpu.kvm) && kvm_run->request_interrupt_window && !kvm_cpu_has_interrupt(&svm->vcpu)) { kvm_run->exit_reason = KVM_EXIT_IRQ_WINDOW_OPEN; diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 9cf5030..3b58788 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -948,7 +948,7 @@ static inline bool cpu_has_vmx_tpr_shadow(void) static inline bool vm_need_tpr_shadow(struct kvm *kvm) { - return (cpu_has_vmx_tpr_shadow()) && (irqchip_in_kernel(kvm)); + return (cpu_has_vmx_tpr_shadow()) && lapic_in_kernel(kvm); } static inline bool cpu_has_secondary_exec_ctrls(void) @@ -1064,7 +1064,7 @@ static inline bool cpu_has_vmx_ple(void) static inline bool vm_need_virtualize_apic_accesses(struct kvm *kvm) { - return flexpriority_enabled && irqchip_in_kernel(kvm); + return flexpriority_enabled && lapic_in_kernel(kvm); } static inline bool cpu_has_vmx_vpid(void) @@ -4341,7 +4341,7 @@ static void vmx_disable_intercept_msr_write_x2apic(u32 msr) static int vmx_vm_has_apicv(struct kvm *kvm) { - return enable_apicv && irqchip_in_kernel(kvm); + return enable_apicv && lapic_in_kernel(kvm); } static int vmx_complete_nested_posted_interrupt(struct kvm_vcpu *vcpu) @@ -5317,7 +5317,7 @@ static int handle_cr(struct kvm_vcpu *vcpu) u8 cr8 = (u8)val; err = kvm_set_cr8(vcpu, cr8); kvm_complete_insn_gp(vcpu, err); - if (irqchip_in_kernel(vcpu->kvm)) + if (lapic_in_kernel(vcpu->kvm)) return 1; if (cr8_prev <= cr8) return 1; @@ -5534,7 +5534,7 @@ static int handle_interrupt_window(struct kvm_vcpu *vcpu) * If the user space waits to inject interrupts, exit as soon as * possible */ - if (!irqchip_in_kernel(vcpu->kvm) && + if (!lapic_in_kernel(vcpu->kvm) && vcpu->run->request_interrupt_window && !kvm_cpu_has_interrupt(vcpu)) { vcpu->run->exit_reason = KVM_EXIT_IRQ_WINDOW_OPEN; @@ -9419,7 +9419,7 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) /* vmcs12's VM_ENTRY_LOAD_IA32_EFER and VM_ENTRY_IA32E_MODE are * emulated by vmx_set_efer(), below. */ - vm_entry_controls_init(vmx, + vm_entry_controls_init(vmx, (vmcs12->vm_entry_controls & ~VM_ENTRY_LOAD_IA32_EFER & ~VM_ENTRY_IA32E_MODE) | (vmcs_config.vmentry_ctrl & ~VM_ENTRY_IA32E_MODE)); diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 79dde16..19c8980 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -784,7 +784,7 @@ int kvm_set_cr8(struct kvm_vcpu *vcpu, unsigned long cr8) { if (cr8 & CR8_RESERVED_BITS) return 1; - if (irqchip_in_kernel(vcpu->kvm)) + if (lapic_in_kernel(vcpu->kvm)) kvm_lapic_set_tpr(vcpu, cr8); else vcpu->arch.cr8 = cr8; @@ -794,7 +794,7 @@ EXPORT_SYMBOL_GPL(kvm_set_cr8); unsigned long kvm_get_cr8(struct kvm_vcpu *vcpu) { - if (irqchip_in_kernel(vcpu->kvm)) + if (lapic_in_kernel(vcpu->kvm)) return kvm_lapic_get_cr8(vcpu); else return vcpu->arch.cr8; @@ -2866,6 +2866,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) case KVM_CAP_TSC_DEADLINE_TIMER: case KVM_CAP_ENABLE_CAP_VM: case KVM_CAP_DISABLE_QUIRKS: + case KVM_CAP_SPLIT_IRQCHIP: #ifdef CONFIG_KVM_DEVICE_ASSIGNMENT case KVM_CAP_ASSIGN_DEV_IRQ: case KVM_CAP_PCI_2_3: @@ -3068,7 +3069,7 @@ static int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu, { if (irq->irq >= KVM_NR_INTERRUPTS) return -EINVAL; - if (irqchip_in_kernel(vcpu->kvm)) + if (lapic_in_kernel(vcpu->kvm)) return -ENXIO; kvm_queue_interrupt(vcpu, irq->irq, false); @@ -3546,7 +3547,7 @@ long kvm_arch_vcpu_ioctl(struct file *filp, struct kvm_vapic_addr va; r = -EINVAL; - if (!irqchip_in_kernel(vcpu->kvm)) + if (!lapic_in_kernel(vcpu->kvm)) goto out; r = -EFAULT; if (copy_from_user(&va, argp, sizeof va)) @@ -3904,7 +3905,7 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log) int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_event, bool line_status) { - if (!irqchip_in_kernel(kvm)) + if (!lapic_in_kernel(kvm)) return -ENXIO; irq_event->status = kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, @@ -3926,6 +3927,23 @@ static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, kvm->arch.disabled_quirks = cap->args[0]; r = 0; break; + case KVM_CAP_SPLIT_IRQCHIP: { + mutex_lock(&kvm->lock); + r = -EEXIST; + if (lapic_in_kernel(kvm)) + goto split_irqchip_unlock; + r = -EINVAL; + if (atomic_read(&kvm->online_vcpus)) + goto split_irqchip_unlock; + r = kvm_setup_empty_irq_routing(kvm); + if (r) + goto split_irqchip_unlock; + kvm->arch.irqchip_split = true; + r = 0; +split_irqchip_unlock: + mutex_unlock(&kvm->lock); + break; + } default: r = -EINVAL; break; @@ -4194,6 +4212,7 @@ long kvm_arch_vm_ioctl(struct file *filp, r = kvm_vm_ioctl_enable_cap(kvm, &cap); break; } + default: r = kvm_vm_ioctl_assigned_device(kvm, ioctl, arg); } @@ -5959,7 +5978,7 @@ void kvm_arch_exit(void) int kvm_vcpu_halt(struct kvm_vcpu *vcpu) { ++vcpu->stat.halt_exits; - if (irqchip_in_kernel(vcpu->kvm)) { + if (lapic_in_kernel(vcpu->kvm)) { vcpu->arch.mp_state = KVM_MP_STATE_HALTED; return 1; } else { @@ -6126,7 +6145,7 @@ static int emulator_fix_hypercall(struct x86_emulate_ctxt *ctxt) */ static int dm_request_for_irq_injection(struct kvm_vcpu *vcpu) { - return (!irqchip_in_kernel(vcpu->kvm) && !kvm_cpu_has_interrupt(vcpu) && + return (!lapic_in_kernel(vcpu->kvm) && !kvm_cpu_has_interrupt(vcpu) && vcpu->run->request_interrupt_window && kvm_arch_interrupt_allowed(vcpu)); } @@ -6138,7 +6157,7 @@ static void post_kvm_run_save(struct kvm_vcpu *vcpu) kvm_run->if_flag = (kvm_get_rflags(vcpu) & X86_EFLAGS_IF) != 0; kvm_run->cr8 = kvm_get_cr8(vcpu); kvm_run->apic_base = kvm_get_apic_base(vcpu); - if (irqchip_in_kernel(vcpu->kvm)) + if (lapic_in_kernel(vcpu->kvm)) kvm_run->ready_for_interrupt_injection = 1; else kvm_run->ready_for_interrupt_injection = @@ -6285,7 +6304,7 @@ void kvm_vcpu_reload_apic_access_page(struct kvm_vcpu *vcpu) { struct page *page = NULL; - if (!irqchip_in_kernel(vcpu->kvm)) + if (!lapic_in_kernel(vcpu->kvm)) return; if (!kvm_x86_ops->set_apic_access_page_addr) @@ -6323,7 +6342,7 @@ void kvm_arch_mmu_notifier_invalidate_page(struct kvm *kvm, static int vcpu_enter_guest(struct kvm_vcpu *vcpu) { int r; - bool req_int_win = !irqchip_in_kernel(vcpu->kvm) && + bool req_int_win = !lapic_in_kernel(vcpu->kvm) && vcpu->run->request_interrupt_window; bool req_immediate_exit = false; @@ -6712,7 +6731,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) } /* re-sync apic's tpr */ - if (!irqchip_in_kernel(vcpu->kvm)) { + if (!lapic_in_kernel(vcpu->kvm)) { if (kvm_set_cr8(vcpu, kvm_run->cr8) != 0) { r = -EINVAL; goto out; @@ -7421,7 +7440,7 @@ void kvm_arch_check_processor_compat(void *rtn) bool kvm_vcpu_compatible(struct kvm_vcpu *vcpu) { - return irqchip_in_kernel(vcpu->kvm) == (vcpu->arch.apic != NULL); + return lapic_in_kernel(vcpu->kvm) == (vcpu->arch.apic != NULL); } struct static_key kvm_no_apic_vcpu __read_mostly; @@ -7437,7 +7456,7 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) vcpu->arch.pv.pv_unhalted = false; vcpu->arch.emulate_ctxt.ops = &emulate_ops; - if (!irqchip_in_kernel(kvm) || kvm_vcpu_is_reset_bsp(vcpu)) + if (!lapic_in_kernel(kvm) || kvm_vcpu_is_reset_bsp(vcpu)) vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; else vcpu->arch.mp_state = KVM_MP_STATE_UNINITIALIZED; @@ -7455,7 +7474,7 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) if (r < 0) goto fail_free_pio_data; - if (irqchip_in_kernel(kvm)) { + if (lapic_in_kernel(kvm)) { r = kvm_create_lapic(vcpu); if (r < 0) goto fail_mmu_destroy; @@ -7518,7 +7537,7 @@ void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) kvm_mmu_destroy(vcpu); srcu_read_unlock(&vcpu->kvm->srcu, idx); free_page((unsigned long)vcpu->arch.pio_data); - if (!irqchip_in_kernel(vcpu->kvm)) + if (!lapic_in_kernel(vcpu->kvm)) static_key_slow_dec(&kvm_no_apic_vcpu); } diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h index 133ea00..ffe1f4e 100644 --- a/include/kvm/arm_vgic.h +++ b/include/kvm/arm_vgic.h @@ -329,6 +329,7 @@ int kvm_vgic_vcpu_pending_irq(struct kvm_vcpu *vcpu); int kvm_vgic_vcpu_active_irq(struct kvm_vcpu *vcpu); #define irqchip_in_kernel(k) (!!((k)->arch.vgic.in_kernel)) +#define lapic_in_kernel(k) (irqchip_in_kernel(k)) #define vgic_initialized(k) (!!((k)->arch.vgic.nr_cpus)) #define vgic_ready(k) ((k)->arch.vgic.ready) diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index a8bcbc9..7e2b41a 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -935,6 +935,7 @@ static inline int mmu_notifier_retry(struct kvm *kvm, unsigned long mmu_seq) #endif int kvm_setup_default_irq_routing(struct kvm *kvm); +int kvm_setup_empty_irq_routing(struct kvm *kvm); int kvm_set_irq_routing(struct kvm *kvm, const struct kvm_irq_routing_entry *entries, unsigned nr, diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 75bd9f7..1e6f6c3 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -815,6 +815,7 @@ struct kvm_ppc_smmu_info { #define KVM_CAP_S390_IRQ_STATE 114 #define KVM_CAP_PPC_HWRNG 115 #define KVM_CAP_DISABLE_QUIRKS 116 +#define KVM_CAP_SPLIT_IRQCHIP 117 #ifdef KVM_CAP_IRQ_ROUTING diff --git a/virt/kvm/irqchip.c b/virt/kvm/irqchip.c index 1d56a90..8aaceed 100644 --- a/virt/kvm/irqchip.c +++ b/virt/kvm/irqchip.c @@ -73,7 +73,7 @@ int kvm_send_userspace_msi(struct kvm *kvm, struct kvm_msi *msi) { struct kvm_kernel_irq_routing_entry route; - if (!irqchip_in_kernel(kvm) || msi->flags != 0) + if (!lapic_in_kernel(kvm) || msi->flags != 0) return -EINVAL; route.msi.address_lo = msi->address_lo;
First patch in a series which enables the relocation of the PIC/IOAPIC to userspace. Adds capability KVM_CAP_SPLIT_IRQCHIP; KVM_CAP_SPLIT_IRQCHIP enables the construction of LAPICs without the rest of the irqchip. Compile tested for x86. Signed-off-by: Steve Rutherford <srutherford@google.com> Suggested-by: Andrew Honig <ahonig@google.com> --- Documentation/virtual/kvm/api.txt | 15 ++++++++++++ arch/powerpc/kvm/irq.h | 5 ++++ arch/s390/kvm/irq.h | 4 ++++ arch/x86/include/asm/kvm_host.h | 2 ++ arch/x86/kvm/assigned-dev.c | 4 ++-- arch/x86/kvm/irq.c | 6 ++--- arch/x86/kvm/irq.h | 11 +++++++++ arch/x86/kvm/irq_comm.c | 7 ++++++ arch/x86/kvm/lapic.c | 13 +++++++---- arch/x86/kvm/mmu.c | 2 +- arch/x86/kvm/svm.c | 4 ++-- arch/x86/kvm/vmx.c | 12 +++++----- arch/x86/kvm/x86.c | 49 +++++++++++++++++++++++++++------------ include/kvm/arm_vgic.h | 1 + include/linux/kvm_host.h | 1 + include/uapi/linux/kvm.h | 1 + virt/kvm/irqchip.c | 2 +- 17 files changed, 104 insertions(+), 35 deletions(-)