@@ -613,6 +613,8 @@ struct kvm_vcpu_xen {
u64 timer_expires; /* In guest epoch */
atomic_t timer_pending;
struct hrtimer timer;
+ int poll_evtchn;
+ struct timer_list poll_timer;
};
struct kvm_vcpu_arch {
@@ -1027,6 +1029,7 @@ struct kvm_xen {
u8 upcall_vector;
struct gfn_to_pfn_cache shinfo_cache;
struct idr evtchn_ports;
+ unsigned long poll_mask[BITS_TO_LONGS(KVM_MAX_VCPUS)];
};
enum kvm_irqchip_mode {
@@ -10,6 +10,7 @@
#include "xen.h"
#include "lapic.h"
#include "hyperv.h"
+#include "lapic.h"
#include <linux/eventfd.h>
#include <linux/kvm_host.h>
@@ -971,9 +972,146 @@ static int kvm_xen_hypercall_complete_userspace(struct kvm_vcpu *vcpu)
return kvm_xen_hypercall_set_result(vcpu, run->xen.u.hcall.result);
}
-static bool kvm_xen_hcall_sched_op(struct kvm_vcpu *vcpu, int cmd, u64 param, u64 *r)
+static bool wait_pending_event(struct kvm_vcpu *vcpu, int nr_ports,
+ evtchn_port_t *ports)
+{
+ struct kvm *kvm = vcpu->kvm;
+ struct gfn_to_pfn_cache *gpc = &kvm->arch.xen.shinfo_cache;
+ unsigned long *pending_bits;
+ unsigned long flags;
+ bool ret = true;
+ int idx, i;
+
+ read_lock_irqsave(&gpc->lock, flags);
+ idx = srcu_read_lock(&kvm->srcu);
+ if (!kvm_gfn_to_pfn_cache_check(kvm, gpc, gpc->gpa, PAGE_SIZE))
+ goto out_rcu;
+
+ ret = false;
+ if (IS_ENABLED(CONFIG_64BIT) && kvm->arch.xen.long_mode) {
+ struct shared_info *shinfo = gpc->khva;
+ pending_bits = (unsigned long *)&shinfo->evtchn_pending;
+ } else {
+ struct compat_shared_info *shinfo = gpc->khva;
+ pending_bits = (unsigned long *)&shinfo->evtchn_pending;
+ }
+
+ for (i = 0; i < nr_ports; i++) {
+ if (test_bit(ports[i], pending_bits)) {
+ ret = true;
+ break;
+ }
+ }
+
+ out_rcu:
+ srcu_read_unlock(&kvm->srcu, idx);
+ read_unlock_irqrestore(&gpc->lock, flags);
+
+ return ret;
+}
+
+static bool kvm_xen_schedop_poll(struct kvm_vcpu *vcpu, bool longmode,
+ u64 param, u64 *r)
+{
+ int idx, i;
+ struct sched_poll sched_poll;
+ evtchn_port_t port, *ports;
+ gpa_t gpa;
+
+ if (!longmode || !lapic_in_kernel(vcpu) ||
+ !(vcpu->kvm->arch.xen_hvm_config.flags & KVM_XEN_HVM_CONFIG_EVTCHN_SEND))
+ return false;
+
+ idx = srcu_read_lock(&vcpu->kvm->srcu);
+ gpa = kvm_mmu_gva_to_gpa_system(vcpu, param, NULL);
+ srcu_read_unlock(&vcpu->kvm->srcu, idx);
+
+ if (!gpa || kvm_vcpu_read_guest(vcpu, gpa, &sched_poll,
+ sizeof(sched_poll))) {
+ *r = -EFAULT;
+ return true;
+ }
+
+ if (unlikely(sched_poll.nr_ports > 1)) {
+ /* Xen (unofficially) limits number of pollers to 128 */
+ if (sched_poll.nr_ports > 128) {
+ *r = -EINVAL;
+ return true;
+ }
+
+ ports = kmalloc_array(sched_poll.nr_ports,
+ sizeof(*ports), GFP_KERNEL);
+ if (!ports) {
+ *r = -ENOMEM;
+ return true;
+ }
+ } else
+ ports = &port;
+
+ for (i = 0; i < sched_poll.nr_ports; i++) {
+ idx = srcu_read_lock(&vcpu->kvm->srcu);
+ gpa = kvm_mmu_gva_to_gpa_system(vcpu,
+ (gva_t)(sched_poll.ports + i),
+ NULL);
+ srcu_read_unlock(&vcpu->kvm->srcu, idx);
+
+ if (!gpa || kvm_vcpu_read_guest(vcpu, gpa,
+ &ports[i], sizeof(port))) {
+ *r = -EFAULT;
+ goto out;
+ }
+ }
+
+ if (sched_poll.nr_ports == 1)
+ vcpu->arch.xen.poll_evtchn = port;
+ else
+ vcpu->arch.xen.poll_evtchn = -1;
+
+ set_bit(kvm_vcpu_get_idx(vcpu), vcpu->kvm->arch.xen.poll_mask);
+
+ if (!wait_pending_event(vcpu, sched_poll.nr_ports, ports)) {
+ vcpu->arch.mp_state = KVM_MP_STATE_HALTED;
+
+ if (sched_poll.timeout)
+ mod_timer(&vcpu->arch.xen.poll_timer,
+ jiffies + nsecs_to_jiffies(sched_poll.timeout));
+
+ kvm_vcpu_halt(vcpu);
+
+ if (sched_poll.timeout)
+ del_timer(&vcpu->arch.xen.poll_timer);
+
+ vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
+ kvm_clear_request(KVM_REQ_UNHALT, vcpu);
+ }
+
+ vcpu->arch.xen.poll_evtchn = 0;
+ *r = 0;
+out:
+ /* Really, this is only needed in case of timeout */
+ clear_bit(kvm_vcpu_get_idx(vcpu), vcpu->kvm->arch.xen.poll_mask);
+
+ if (unlikely(sched_poll.nr_ports > 1))
+ kfree(ports);
+ return true;
+}
+
+static void cancel_evtchn_poll(struct timer_list *t)
+{
+ struct kvm_vcpu *vcpu = from_timer(vcpu, t, arch.xen.poll_timer);
+
+ kvm_make_request(KVM_REQ_UNBLOCK, vcpu);
+ kvm_vcpu_kick(vcpu);
+}
+
+static bool kvm_xen_hcall_sched_op(struct kvm_vcpu *vcpu, bool longmode,
+ int cmd, u64 param, u64 *r)
{
switch (cmd) {
+ case SCHEDOP_poll:
+ if (kvm_xen_schedop_poll(vcpu, longmode, param, r))
+ return true;
+ fallthrough;
case SCHEDOP_yield:
kvm_vcpu_on_spin(vcpu, true);
*r = 0;
@@ -1138,7 +1276,8 @@ int kvm_xen_hypercall(struct kvm_vcpu *vcpu)
handled = kvm_xen_hcall_evtchn_send(vcpu, params[1], &r);
break;
case __HYPERVISOR_sched_op:
- handled = kvm_xen_hcall_sched_op(vcpu, params[0], params[1], &r);
+ handled = kvm_xen_hcall_sched_op(vcpu, longmode, params[0],
+ params[1], &r);
break;
case __HYPERVISOR_vcpu_op:
handled = kvm_xen_hcall_vcpu_op(vcpu, longmode, params[0], params[1],
@@ -1185,6 +1324,17 @@ static inline int max_evtchn_port(struct kvm *kvm)
return COMPAT_EVTCHN_2L_NR_CHANNELS;
}
+static void kvm_xen_check_poller(struct kvm_vcpu *vcpu, int port)
+{
+ int poll_evtchn = vcpu->arch.xen.poll_evtchn;
+
+ if ((poll_evtchn == port || poll_evtchn == -1) &&
+ test_and_clear_bit(kvm_vcpu_get_idx(vcpu), vcpu->kvm->arch.xen.poll_mask)) {
+ kvm_make_request(KVM_REQ_UNBLOCK, vcpu);
+ kvm_vcpu_kick(vcpu);
+ }
+}
+
/*
* The return value from this function is propagated to kvm_set_irq() API,
* so it returns:
@@ -1252,6 +1402,7 @@ int kvm_xen_set_evtchn_fast(struct kvm_xen_evtchn *xe, struct kvm *kvm)
rc = 0; /* It was already raised */
} else if (test_bit(xe->port, mask_bits)) {
rc = -ENOTCONN; /* Masked */
+ kvm_xen_check_poller(vcpu, xe->port);
} else {
rc = 1; /* Delivered to the bitmap in shared_info. */
/* Now switch to the vCPU's vcpu_info to set the index and pending_sel */
@@ -1661,6 +1812,8 @@ static bool kvm_xen_hcall_evtchn_send(struct kvm_vcpu *vcpu, u64 param, u64 *r)
void kvm_xen_init_vcpu(struct kvm_vcpu *vcpu)
{
vcpu->arch.xen.vcpu_id = vcpu->vcpu_idx;
+ vcpu->arch.xen.poll_evtchn = 0;
+ timer_setup(&vcpu->arch.xen.poll_timer, cancel_evtchn_poll, 0);
}
void kvm_xen_destroy_vcpu(struct kvm_vcpu *vcpu)
@@ -1674,6 +1827,7 @@ void kvm_xen_destroy_vcpu(struct kvm_vcpu *vcpu)
&vcpu->arch.xen.vcpu_info_cache);
kvm_gfn_to_pfn_cache_destroy(vcpu->kvm,
&vcpu->arch.xen.vcpu_time_info_cache);
+ del_timer_sync(&vcpu->arch.xen.poll_timer);
}
void kvm_xen_init_vm(struct kvm *kvm)
@@ -233,6 +233,12 @@ int main(int argc, char *argv[])
.flags = KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL,
.msr = XEN_HYPERCALL_MSR,
};
+
+ /* Let the kernel know that we *will* use it for sending all
+ * event channels, which lets it intercept SCHEDOP_poll */
+ if (xen_caps & KVM_XEN_HVM_CONFIG_EVTCHN_SEND)
+ hvmc.flags |= KVM_XEN_HVM_CONFIG_EVTCHN_SEND;
+
vm_ioctl(vm, KVM_XEN_HVM_CONFIG, &hvmc);
struct kvm_xen_hvm_attr lm = {