[v3,16/17] KVM: x86/xen: handle PV spinlocks slowpath

Message ID	20220303154127.202856-17-dwmw2@infradead.org (mailing list archive)
State	New, archived
Headers	show Return-Path: <kvm-owner@kernel.org> From: David Woodhouse <dwmw2@infradead.org> To: kvm@vger.kernel.org, Paolo Bonzini <pbonzini@redhat.com> Cc: Sean Christopherson <seanjc@google.com>, Vitaly Kuznetsov <vkuznets@redhat.com>, Wanpeng Li <wanpengli@tencent.com>, Jim Mattson <jmattson@google.com>, Joerg Roedel <joro@8bytes.org>, Joao Martins <joao.m.martins@oracle.com>, Boris Ostrovsky <boris.ostrovsky@oracle.com>, Metin Kaya <metikaya@amazon.co.uk>, Paul Durrant <pdurrant@amazon.co.uk> Subject: [PATCH v3 16/17] KVM: x86/xen: handle PV spinlocks slowpath Date: Thu, 3 Mar 2022 15:41:26 +0000 Message-Id: <20220303154127.202856-17-dwmw2@infradead.org> In-Reply-To: <20220303154127.202856-1-dwmw2@infradead.org> References: <20220303154127.202856-1-dwmw2@infradead.org> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit Sender: David Woodhouse <dwmw2@infradead.org> Precedence: bulk
Series	KVM: Add Xen event channel acceleration \| expand [v3,00/17] KVM: Add Xen event channel acceleration [v3,01/17] KVM: Use enum to track if cached PFN will be used in guest and/or host [v3,02/17] KVM: Remove dirty handling from gfn_to_pfn_cache completely [v3,03/17] KVM: x86/xen: Use gfn_to_pfn_cache for runstate area [v3,04/17] KVM: x86: Use gfn_to_pfn_cache for pv_time [v3,05/17] KVM: x86/xen: Use gfn_to_pfn_cache for vcpu_info [v3,06/17] KVM: x86/xen: Use gfn_to_pfn_cache for vcpu_time_info [v3,07/17] KVM: x86/xen: Make kvm_xen_set_evtchn() reusable from other places [v3,08/17] KVM: x86/xen: Support direct injection of event channel events [v3,09/17] KVM: x86/xen: intercept EVTCHNOP_send from guests [v3,10/17] KVM: x86/xen: handle PV IPI vcpu yield [v3,11/17] KVM: x86/xen: Add KVM_XEN_VCPU_ATTR_TYPE_VCPU_ID [v3,12/17] KVM: x86/xen: handle PV timers oneshot mode [v3,13/17] KVM: x86/xen: Kernel acceleration for XENVER_version [v3,14/17] KVM: x86/xen: Support per-vCPU event channel upcall via local APIC [v3,15/17] KVM: x86/xen: Advertise and document KVM_XEN_HVM_CONFIG_EVTCHN_SEND [v3,16/17] KVM: x86/xen: handle PV spinlocks slowpath [v3,17/17] KVM: x86/xen: Add self tests for KVM_XEN_HVM_CONFIG_EVTCHN_SEND

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 30f6fc549235..c7cf34adf3de 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -615,6 +615,8 @@ struct kvm_vcpu_xen { u64 timer_expires; /* In guest epoch */ atomic_t timer_pending; struct hrtimer timer; + int poll_evtchn; + struct timer_list poll_timer; }; struct kvm_vcpu_arch { @@ -1029,6 +1031,7 @@ struct kvm_xen { u8 upcall_vector; struct gfn_to_pfn_cache shinfo_cache; struct idr evtchn_ports; + unsigned long poll_mask[BITS_TO_LONGS(KVM_MAX_VCPUS)]; }; enum kvm_irqchip_mode { diff --git a/arch/x86/kvm/xen.c b/arch/x86/kvm/xen.c index 3980cafa0b68..8c85a71aa8ca 100644 --- a/arch/x86/kvm/xen.c +++ b/arch/x86/kvm/xen.c @@ -10,6 +10,7 @@ #include "xen.h" #include "lapic.h" #include "hyperv.h" +#include "lapic.h" #include <linux/eventfd.h> #include <linux/kvm_host.h> @@ -972,9 +973,146 @@ static int kvm_xen_hypercall_complete_userspace(struct kvm_vcpu *vcpu) return kvm_xen_hypercall_set_result(vcpu, run->xen.u.hcall.result); } -static bool kvm_xen_hcall_sched_op(struct kvm_vcpu *vcpu, int cmd, u64 param, u64 *r) +static bool wait_pending_event(struct kvm_vcpu *vcpu, int nr_ports, + evtchn_port_t *ports) +{ + struct kvm *kvm = vcpu->kvm; + struct gfn_to_pfn_cache *gpc = &kvm->arch.xen.shinfo_cache; + unsigned long *pending_bits; + unsigned long flags; + bool ret = true; + int idx, i; + + read_lock_irqsave(&gpc->lock, flags); + idx = srcu_read_lock(&kvm->srcu); + if (!kvm_gfn_to_pfn_cache_check(kvm, gpc, gpc->gpa, PAGE_SIZE)) + goto out_rcu; + + ret = false; + if (IS_ENABLED(CONFIG_64BIT) && kvm->arch.xen.long_mode) { + struct shared_info *shinfo = gpc->khva; + pending_bits = (unsigned long *)&shinfo->evtchn_pending; + } else { + struct compat_shared_info *shinfo = gpc->khva; + pending_bits = (unsigned long *)&shinfo->evtchn_pending; + } + + for (i = 0; i < nr_ports; i++) { + if (test_bit(ports[i], pending_bits)) { + ret = true; + break; + } + } + + out_rcu: + srcu_read_unlock(&kvm->srcu, idx); + read_unlock_irqrestore(&gpc->lock, flags); + + return ret; +} + +static bool kvm_xen_schedop_poll(struct kvm_vcpu *vcpu, bool longmode, + u64 param, u64 *r) +{ + int idx, i; + struct sched_poll sched_poll; + evtchn_port_t port, *ports; + gpa_t gpa; + + if (!longmode || !lapic_in_kernel(vcpu) || + !(vcpu->kvm->arch.xen_hvm_config.flags & KVM_XEN_HVM_CONFIG_EVTCHN_SEND)) + return false; + + idx = srcu_read_lock(&vcpu->kvm->srcu); + gpa = kvm_mmu_gva_to_gpa_system(vcpu, param, NULL); + srcu_read_unlock(&vcpu->kvm->srcu, idx); + + if (!gpa || kvm_vcpu_read_guest(vcpu, gpa, &sched_poll, + sizeof(sched_poll))) { + *r = -EFAULT; + return true; + } + + if (unlikely(sched_poll.nr_ports > 1)) { + /* Xen (unofficially) limits number of pollers to 128 */ + if (sched_poll.nr_ports > 128) { + *r = -EINVAL; + return true; + } + + ports = kmalloc_array(sched_poll.nr_ports, + sizeof(*ports), GFP_KERNEL); + if (!ports) { + *r = -ENOMEM; + return true; + } + } else + ports = &port; + + for (i = 0; i < sched_poll.nr_ports; i++) { + idx = srcu_read_lock(&vcpu->kvm->srcu); + gpa = kvm_mmu_gva_to_gpa_system(vcpu, + (gva_t)(sched_poll.ports + i), + NULL); + srcu_read_unlock(&vcpu->kvm->srcu, idx); + + if (!gpa || kvm_vcpu_read_guest(vcpu, gpa, + &ports[i], sizeof(port))) { + *r = -EFAULT; + goto out; + } + } + + if (sched_poll.nr_ports == 1) + vcpu->arch.xen.poll_evtchn = port; + else + vcpu->arch.xen.poll_evtchn = -1; + + set_bit(kvm_vcpu_get_idx(vcpu), vcpu->kvm->arch.xen.poll_mask); + + if (!wait_pending_event(vcpu, sched_poll.nr_ports, ports)) { + vcpu->arch.mp_state = KVM_MP_STATE_HALTED; + + if (sched_poll.timeout) + mod_timer(&vcpu->arch.xen.poll_timer, + jiffies + nsecs_to_jiffies(sched_poll.timeout)); + + kvm_vcpu_halt(vcpu); + + if (sched_poll.timeout) + del_timer(&vcpu->arch.xen.poll_timer); + + vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; + kvm_clear_request(KVM_REQ_UNHALT, vcpu); + } + + vcpu->arch.xen.poll_evtchn = 0; + *r = 0; +out: + /* Really, this is only needed in case of timeout */ + clear_bit(kvm_vcpu_get_idx(vcpu), vcpu->kvm->arch.xen.poll_mask); + + if (unlikely(sched_poll.nr_ports > 1)) + kfree(ports); + return true; +} + +static void cancel_evtchn_poll(struct timer_list *t) +{ + struct kvm_vcpu *vcpu = from_timer(vcpu, t, arch.xen.poll_timer); + + kvm_make_request(KVM_REQ_UNBLOCK, vcpu); + kvm_vcpu_kick(vcpu); +} + +static bool kvm_xen_hcall_sched_op(struct kvm_vcpu *vcpu, bool longmode, + int cmd, u64 param, u64 *r) { switch (cmd) { + case SCHEDOP_poll: + if (kvm_xen_schedop_poll(vcpu, longmode, param, r)) + return true; + fallthrough; case SCHEDOP_yield: kvm_vcpu_on_spin(vcpu, true); *r = 0; @@ -1139,7 +1277,8 @@ int kvm_xen_hypercall(struct kvm_vcpu *vcpu) handled = kvm_xen_hcall_evtchn_send(vcpu, params[1], &r); break; case __HYPERVISOR_sched_op: - handled = kvm_xen_hcall_sched_op(vcpu, params[0], params[1], &r); + handled = kvm_xen_hcall_sched_op(vcpu, longmode, params[0], + params[1], &r); break; case __HYPERVISOR_vcpu_op: handled = kvm_xen_hcall_vcpu_op(vcpu, longmode, params[0], params[1], @@ -1186,6 +1325,17 @@ static inline int max_evtchn_port(struct kvm *kvm) return COMPAT_EVTCHN_2L_NR_CHANNELS; } +static void kvm_xen_check_poller(struct kvm_vcpu *vcpu, int port) +{ + int poll_evtchn = vcpu->arch.xen.poll_evtchn; + + if ((poll_evtchn == port || poll_evtchn == -1) && + test_and_clear_bit(kvm_vcpu_get_idx(vcpu), vcpu->kvm->arch.xen.poll_mask)) { + kvm_make_request(KVM_REQ_UNBLOCK, vcpu); + kvm_vcpu_kick(vcpu); + } +} + /* * The return value from this function is propagated to kvm_set_irq() API, * so it returns: @@ -1253,6 +1403,7 @@ int kvm_xen_set_evtchn_fast(struct kvm_xen_evtchn *xe, struct kvm *kvm) rc = 0; /* It was already raised */ } else if (test_bit(xe->port, mask_bits)) { rc = -ENOTCONN; /* Masked */ + kvm_xen_check_poller(vcpu, xe->port); } else { rc = 1; /* Delivered to the bitmap in shared_info. */ /* Now switch to the vCPU's vcpu_info to set the index and pending_sel */ @@ -1683,6 +1834,8 @@ static bool kvm_xen_hcall_evtchn_send(struct kvm_vcpu *vcpu, u64 param, u64 *r) void kvm_xen_init_vcpu(struct kvm_vcpu *vcpu) { vcpu->arch.xen.vcpu_id = vcpu->vcpu_idx; + vcpu->arch.xen.poll_evtchn = 0; + timer_setup(&vcpu->arch.xen.poll_timer, cancel_evtchn_poll, 0); } void kvm_xen_destroy_vcpu(struct kvm_vcpu *vcpu) @@ -1696,6 +1849,7 @@ void kvm_xen_destroy_vcpu(struct kvm_vcpu *vcpu) &vcpu->arch.xen.vcpu_info_cache); kvm_gfn_to_pfn_cache_destroy(vcpu->kvm, &vcpu->arch.xen.vcpu_time_info_cache); + del_timer_sync(&vcpu->arch.xen.poll_timer); } void kvm_xen_init_vm(struct kvm *kvm) diff --git a/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c b/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c index 865e17146815..376c611443cd 100644 --- a/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c +++ b/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c @@ -233,6 +233,12 @@ int main(int argc, char *argv[]) .flags = KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL, .msr = XEN_HYPERCALL_MSR, }; + + /* Let the kernel know that we *will* use it for sending all + * event channels, which lets it intercept SCHEDOP_poll */ + if (xen_caps & KVM_XEN_HVM_CONFIG_EVTCHN_SEND) + hvmc.flags |= KVM_XEN_HVM_CONFIG_EVTCHN_SEND; + vm_ioctl(vm, KVM_XEN_HVM_CONFIG, &hvmc); struct kvm_xen_hvm_attr lm = {

[v3,16/17] KVM: x86/xen: handle PV spinlocks slowpath

Commit Message

Patch