@@ -31,8 +31,15 @@ struct arch_timer_context {
/* Timer IRQ */
struct kvm_irq_level irq;
- /* Active IRQ state caching */
- bool active_cleared_last;
+ /*
+ * We have multiple paths which can save/restore the timer state
+ * onto the hardware, so we need some way of keeping track of
+ * where the latest state is.
+ *
+ * loaded == true: State is loaded on the hardware registers.
+ * loaded == false: State is stored in memory.
+ */
+ bool loaded;
/* Virtual offset */
u64 cntvoff;
@@ -78,10 +85,15 @@ void kvm_timer_unschedule(struct kvm_vcpu *vcpu);
u64 kvm_phys_timer_read(void);
+void kvm_timer_vcpu_load(struct kvm_vcpu *vcpu);
void kvm_timer_vcpu_put(struct kvm_vcpu *vcpu);
void kvm_timer_init_vhe(void);
#define vcpu_vtimer(v) (&(v)->arch.timer_cpu.vtimer)
#define vcpu_ptimer(v) (&(v)->arch.timer_cpu.ptimer)
+
+void enable_el1_phys_timer_access(void);
+void disable_el1_phys_timer_access(void);
+
#endif
@@ -46,10 +46,9 @@ static const struct kvm_irq_level default_vtimer_irq = {
.level = 1,
};
-void kvm_timer_vcpu_put(struct kvm_vcpu *vcpu)
-{
- vcpu_vtimer(vcpu)->active_cleared_last = false;
-}
+static bool kvm_timer_irq_can_fire(struct arch_timer_context *timer_ctx);
+static void kvm_timer_update_irq(struct kvm_vcpu *vcpu, bool new_level,
+ struct arch_timer_context *timer_ctx);
u64 kvm_phys_timer_read(void)
{
@@ -69,17 +68,45 @@ static void soft_timer_cancel(struct hrtimer *hrt, struct work_struct *work)
cancel_work_sync(work);
}
-static irqreturn_t kvm_arch_timer_handler(int irq, void *dev_id)
+static void kvm_vtimer_update_mask_user(struct kvm_vcpu *vcpu)
{
- struct kvm_vcpu *vcpu = *(struct kvm_vcpu **)dev_id;
+ struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
/*
- * We disable the timer in the world switch and let it be
- * handled by kvm_timer_sync_hwstate(). Getting a timer
- * interrupt at this point is a sure sign of some major
- * breakage.
+ * When using a userspace irqchip with the architected timers, we must
+ * prevent continuously exiting from the guest, and therefore mask the
+ * physical interrupt by disabling it on the host interrupt controller
+ * when the virtual level is high, such that the guest can make
+ * forward progress. Once we detect the output level being
+ * de-asserted, we unmask the interrupt again so that we exit from the
+ * guest when the timer fires.
*/
- pr_warn("Unexpected interrupt %d on vcpu %p\n", irq, vcpu);
+ if (vtimer->irq.level)
+ disable_percpu_irq(host_vtimer_irq);
+ else
+ enable_percpu_irq(host_vtimer_irq, 0);
+}
+
+static irqreturn_t kvm_arch_timer_handler(int irq, void *dev_id)
+{
+ struct kvm_vcpu *vcpu = *(struct kvm_vcpu **)dev_id;
+ struct arch_timer_context *vtimer;
+
+ if (!vcpu) {
+ pr_warn_once("Spurious arch timer IRQ on non-VCPU thread\n");
+ return IRQ_NONE;
+ }
+ vtimer = vcpu_vtimer(vcpu);
+
+ if (!vtimer->irq.level) {
+ vtimer->cnt_ctl = read_sysreg_el0(cntv_ctl);
+ if (kvm_timer_irq_can_fire(vtimer))
+ kvm_timer_update_irq(vcpu, true, vtimer);
+ }
+
+ if (unlikely(!irqchip_in_kernel(vcpu->kvm)))
+ kvm_vtimer_update_mask_user(vcpu);
+
return IRQ_HANDLED;
}
@@ -215,7 +242,6 @@ static void kvm_timer_update_irq(struct kvm_vcpu *vcpu, bool new_level,
{
int ret;
- timer_ctx->active_cleared_last = false;
timer_ctx->irq.level = new_level;
trace_kvm_timer_update_irq(vcpu->vcpu_id, timer_ctx->irq.irq,
timer_ctx->irq.level);
@@ -271,10 +297,16 @@ static void phys_timer_emulate(struct kvm_vcpu *vcpu,
soft_timer_start(&timer->phys_timer, kvm_timer_compute_delta(timer_ctx));
}
-static void timer_save_state(struct kvm_vcpu *vcpu)
+static void vtimer_save_state(struct kvm_vcpu *vcpu)
{
struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
+ unsigned long flags;
+
+ local_irq_save(flags);
+
+ if (!vtimer->loaded)
+ goto out;
if (timer->enabled) {
vtimer->cnt_ctl = read_sysreg_el0(cntv_ctl);
@@ -283,6 +315,10 @@ static void timer_save_state(struct kvm_vcpu *vcpu)
/* Disable the virtual timer */
write_sysreg_el0(0, cntv_ctl);
+
+ vtimer->loaded = false;
+out:
+ local_irq_restore(flags);
}
/*
@@ -296,6 +332,8 @@ void kvm_timer_schedule(struct kvm_vcpu *vcpu)
struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
struct arch_timer_context *ptimer = vcpu_ptimer(vcpu);
+ vtimer_save_state(vcpu);
+
/*
* No need to schedule a background timer if any guest timer has
* already expired, because kvm_vcpu_block will return before putting
@@ -318,22 +356,34 @@ void kvm_timer_schedule(struct kvm_vcpu *vcpu)
soft_timer_start(&timer->bg_timer, kvm_timer_earliest_exp(vcpu));
}
-static void timer_restore_state(struct kvm_vcpu *vcpu)
+static void vtimer_restore_state(struct kvm_vcpu *vcpu)
{
struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
+ unsigned long flags;
+
+ local_irq_save(flags);
+
+ if (vtimer->loaded)
+ goto out;
if (timer->enabled) {
write_sysreg_el0(vtimer->cnt_cval, cntv_cval);
isb();
write_sysreg_el0(vtimer->cnt_ctl, cntv_ctl);
}
+
+ vtimer->loaded = true;
+out:
+ local_irq_restore(flags);
}
void kvm_timer_unschedule(struct kvm_vcpu *vcpu)
{
struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
+ vtimer_restore_state(vcpu);
+
soft_timer_cancel(&timer->bg_timer, &timer->expired);
}
@@ -352,61 +402,45 @@ static void set_cntvoff(u64 cntvoff)
kvm_call_hyp(__kvm_timer_set_cntvoff, low, high);
}
-static void kvm_timer_flush_hwstate_vgic(struct kvm_vcpu *vcpu)
+static void kvm_timer_vcpu_load_vgic(struct kvm_vcpu *vcpu)
{
struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
bool phys_active;
int ret;
- /*
- * If we enter the guest with the virtual input level to the VGIC
- * asserted, then we have already told the VGIC what we need to, and
- * we don't need to exit from the guest until the guest deactivates
- * the already injected interrupt, so therefore we should set the
- * hardware active state to prevent unnecessary exits from the guest.
- *
- * Also, if we enter the guest with the virtual timer interrupt active,
- * then it must be active on the physical distributor, because we set
- * the HW bit and the guest must be able to deactivate the virtual and
- * physical interrupt at the same time.
- *
- * Conversely, if the virtual input level is deasserted and the virtual
- * interrupt is not active, then always clear the hardware active state
- * to ensure that hardware interrupts from the timer triggers a guest
- * exit.
- */
phys_active = vtimer->irq.level ||
- kvm_vgic_map_is_active(vcpu, vtimer->irq.irq);
-
- /*
- * We want to avoid hitting the (re)distributor as much as
- * possible, as this is a potentially expensive MMIO access
- * (not to mention locks in the irq layer), and a solution for
- * this is to cache the "active" state in memory.
- *
- * Things to consider: we cannot cache an "active set" state,
- * because the HW can change this behind our back (it becomes
- * "clear" in the HW). We must then restrict the caching to
- * the "clear" state.
- *
- * The cache is invalidated on:
- * - vcpu put, indicating that the HW cannot be trusted to be
- * in a sane state on the next vcpu load,
- * - any change in the interrupt state
- *
- * Usage conditions:
- * - cached value is "active clear"
- * - value to be programmed is "active clear"
- */
- if (vtimer->active_cleared_last && !phys_active)
- return;
+ kvm_vgic_map_is_active(vcpu, vtimer->irq.irq);
ret = irq_set_irqchip_state(host_vtimer_irq,
IRQCHIP_STATE_ACTIVE,
phys_active);
WARN_ON(ret);
+}
- vtimer->active_cleared_last = !phys_active;
+static void kvm_timer_vcpu_load_user(struct kvm_vcpu *vcpu)
+{
+ kvm_vtimer_update_mask_user(vcpu);
+}
+
+void kvm_timer_vcpu_load(struct kvm_vcpu *vcpu)
+{
+ struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
+ struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
+
+ if (unlikely(!timer->enabled))
+ return;
+
+ if (unlikely(!irqchip_in_kernel(vcpu->kvm)))
+ kvm_timer_vcpu_load_user(vcpu);
+ else
+ kvm_timer_vcpu_load_vgic(vcpu);
+
+ set_cntvoff(vtimer->cntvoff);
+
+ vtimer_restore_state(vcpu);
+
+ if (has_vhe())
+ disable_el1_phys_timer_access();
}
bool kvm_timer_should_notify_user(struct kvm_vcpu *vcpu)
@@ -426,23 +460,6 @@ bool kvm_timer_should_notify_user(struct kvm_vcpu *vcpu)
ptimer->irq.level != plevel;
}
-static void kvm_timer_flush_hwstate_user(struct kvm_vcpu *vcpu)
-{
- struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
-
- /*
- * To prevent continuously exiting from the guest, we mask the
- * physical interrupt such that the guest can make forward progress.
- * Once we detect the output level being deasserted, we unmask the
- * interrupt again so that we exit from the guest when the timer
- * fires.
- */
- if (vtimer->irq.level)
- disable_percpu_irq(host_vtimer_irq);
- else
- enable_percpu_irq(host_vtimer_irq, 0);
-}
-
/**
* kvm_timer_flush_hwstate - prepare timers before running the vcpu
* @vcpu: The vcpu pointer
@@ -455,23 +472,61 @@ static void kvm_timer_flush_hwstate_user(struct kvm_vcpu *vcpu)
void kvm_timer_flush_hwstate(struct kvm_vcpu *vcpu)
{
struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
- struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
+ struct arch_timer_context *ptimer = vcpu_ptimer(vcpu);
if (unlikely(!timer->enabled))
return;
- kvm_timer_update_state(vcpu);
+ if (kvm_timer_should_fire(ptimer) != ptimer->irq.level)
+ kvm_timer_update_irq(vcpu, !ptimer->irq.level, ptimer);
/* Set the background timer for the physical timer emulation. */
phys_timer_emulate(vcpu, vcpu_ptimer(vcpu));
+}
- if (unlikely(!irqchip_in_kernel(vcpu->kvm)))
- kvm_timer_flush_hwstate_user(vcpu);
- else
- kvm_timer_flush_hwstate_vgic(vcpu);
+void kvm_timer_vcpu_put(struct kvm_vcpu *vcpu)
+{
+ struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
- set_cntvoff(vtimer->cntvoff);
- timer_restore_state(vcpu);
+ if (unlikely(!timer->enabled))
+ return;
+
+ if (has_vhe())
+ enable_el1_phys_timer_access();
+
+ vtimer_save_state(vcpu);
+
+ /*
+ * The kernel may decide to run userspace after calling vcpu_put, so
+ * we reset cntvoff to 0 to ensure a consistent read between user
+ * accesses to the virtual counter and kernel access to the physical
+ * counter.
+ */
+ set_cntvoff(0);
+}
+
+static void unmask_vtimer_irq(struct kvm_vcpu *vcpu)
+{
+ struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
+
+ if (unlikely(!irqchip_in_kernel(vcpu->kvm))) {
+ kvm_vtimer_update_mask_user(vcpu);
+ return;
+ }
+
+ /*
+ * If the guest disabled the timer without acking the interrupt, then
+ * we must make sure the physical and virtual active states are in
+ * sync by deactivating the physical interrupt, because otherwise we
+ * wouldn't see the next timer interrupt in the host.
+ */
+ if (!kvm_vgic_map_is_active(vcpu, vtimer->irq.irq)) {
+ int ret;
+ ret = irq_set_irqchip_state(host_vtimer_irq,
+ IRQCHIP_STATE_ACTIVE,
+ false);
+ WARN_ON(ret);
+ }
}
/**
@@ -484,6 +539,7 @@ void kvm_timer_flush_hwstate(struct kvm_vcpu *vcpu)
void kvm_timer_sync_hwstate(struct kvm_vcpu *vcpu)
{
struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
+ struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
/*
* This is to cancel the background timer for the physical timer
@@ -491,14 +547,19 @@ void kvm_timer_sync_hwstate(struct kvm_vcpu *vcpu)
*/
soft_timer_cancel(&timer->phys_timer, NULL);
- timer_save_state(vcpu);
- set_cntvoff(0);
-
/*
- * The guest could have modified the timer registers or the timer
- * could have expired, update the timer state.
+ * If we entered the guest with the vtimer output asserted we have to
+ * check if the guest has modified the timer so that we should lower
+ * the line at this point.
*/
- kvm_timer_update_state(vcpu);
+ if (vtimer->irq.level) {
+ vtimer->cnt_ctl = read_sysreg_el0(cntv_ctl);
+ vtimer->cnt_cval = read_sysreg_el0(cntv_cval);
+ if (!kvm_timer_should_fire(vtimer)) {
+ kvm_timer_update_irq(vcpu, false, vtimer);
+ unmask_vtimer_irq(vcpu);
+ }
+ }
}
int kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu)
@@ -354,18 +354,18 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
vcpu->arch.host_cpu_context = this_cpu_ptr(kvm_host_cpu_state);
kvm_arm_set_running_vcpu(vcpu);
-
kvm_vgic_load(vcpu);
+ kvm_timer_vcpu_load(vcpu);
}
void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
{
+ kvm_timer_vcpu_put(vcpu);
kvm_vgic_put(vcpu);
vcpu->cpu = -1;
kvm_arm_set_running_vcpu(NULL);
- kvm_timer_vcpu_put(vcpu);
}
static void vcpu_power_off(struct kvm_vcpu *vcpu)
@@ -710,15 +710,26 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
kvm_arm_clear_debug(vcpu);
/*
- * We must sync the PMU and timer state before the vgic state so
+ * We must sync the PMU state before the vgic state so
* that the vgic can properly sample the updated state of the
* interrupt line.
*/
kvm_pmu_sync_hwstate(vcpu);
- kvm_timer_sync_hwstate(vcpu);
+ /*
+ * Sync the vgic state before syncing the timer state because
+ * the timer code needs to know if the virtual timer
+ * interrupts are active.
+ */
kvm_vgic_sync_hwstate(vcpu);
+ /*
+ * Sync the timer hardware state before enabling interrupts as
+ * we don't want vtimer interrupts to race with syncing the
+ * timer virtual interrupt state.
+ */
+ kvm_timer_sync_hwstate(vcpu);
+
/*
* We may have taken a host interrupt in HYP mode (ie
* while executing the guest). This interrupt is still