diff mbox series

[v3,09/11] x86/vpt: switch interrupt injection model

Message ID 20210331103303.79705-10-roger.pau@citrix.com (mailing list archive)
State Superseded
Headers show
Series x86/intr: introduce EOI callbacks and fix vPT | expand

Commit Message

Roger Pau Monne March 31, 2021, 10:33 a.m. UTC
Currently vPT relies on timers being assigned to a vCPU and performing
checks on every return to HVM guest in order to check if an interrupt
from a vPT timer assigned to the vCPU is currently being injected.

This model doesn't work properly since the interrupt destination vCPU
of a vPT timer can be different from the vCPU where the timer is
currently assigned, in which case the timer would get stuck because it
never sees the interrupt as being injected.

Knowing when a vPT interrupt is injected is relevant for the guest
timer modes where missed vPT interrupts are not discarded and instead
are accumulated and injected when possible.

This change aims to modify the logic described above, so that vPT
doesn't need to check on every return to HVM guest if a vPT interrupt
is being injected. In order to achieve this the vPT code is modified
to make use of the new EOI callbacks, so that virtual timers can
detect when a interrupt has been serviced by the guest by waiting for
the EOI callback to execute.

This model also simplifies some of the logic, as when executing the
timer EOI callback Xen can try to inject another interrupt if the
timer has interrupts pending for delivery.

Note that timers are still bound to a vCPU for the time being, this
relation however doesn't limit the interrupt destination anymore, and
will be removed by further patches.

This model has been tested with Windows 7 guests without showing any
timer delay, even when the guest was limited to have very little CPU
capacity and pending virtual timer interrupts accumulate.

Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>
---
Changes since v2:
 - Avoid and explicit != NULL check.
 - Use a switch in inject_interrupt to evaluate the timer mode.
 - Print the pt->source field on error in create_periodic_time.

Changes since v1:
 - New in this version.
---
 xen/arch/x86/hvm/svm/intr.c   |   3 -
 xen/arch/x86/hvm/vmx/intr.c   |  59 ------
 xen/arch/x86/hvm/vpt.c        | 334 ++++++++++++++--------------------
 xen/include/asm-x86/hvm/vpt.h |   5 +-
 4 files changed, 143 insertions(+), 258 deletions(-)

Comments

Jan Beulich April 14, 2021, 10:28 a.m. UTC | #1
On 31.03.2021 12:33, Roger Pau Monne wrote:
> ---
>  xen/arch/x86/hvm/svm/intr.c   |   3 -
>  xen/arch/x86/hvm/vmx/intr.c   |  59 ------
>  xen/arch/x86/hvm/vpt.c        | 334 ++++++++++++++--------------------
>  xen/include/asm-x86/hvm/vpt.h |   5 +-
>  4 files changed, 143 insertions(+), 258 deletions(-)

Nice.

> @@ -285,189 +238,144 @@ static void pt_irq_fired(struct vcpu *v, struct periodic_time *pt)
>              list_del(&pt->list);
>          pt->on_list = false;
>          pt->pending_intr_nr = 0;
> +
> +        return;
>      }
> -    else if ( mode_is(v->domain, one_missed_tick_pending) ||
> -              mode_is(v->domain, no_missed_ticks_pending) )
> +
> +    if ( mode_is(v->domain, one_missed_tick_pending) ||
> +         mode_is(v->domain, no_missed_ticks_pending) )
>      {
> -        pt->last_plt_gtime = hvm_get_guest_time(v);
>          pt_process_missed_ticks(pt);
>          pt->pending_intr_nr = 0; /* 'collapse' all missed ticks */
> +    }
> +    else if ( !pt->pending_intr_nr )
> +        pt_process_missed_ticks(pt);

Did you lose a -- here? I.e. does the condition mean to match ...

> +    if ( !pt->pending_intr_nr )
>          set_timer(&pt->timer, pt->scheduled);
> +}
> +
> +static void pt_timer_fn(void *data)
> +{
> +    struct periodic_time *pt = data;
> +    struct vcpu *v;
> +    time_cb *cb = NULL;
> +    void *cb_priv;
> +    unsigned int irq;
> +
> +    pt_lock(pt);
> +
> +    v = pt->vcpu;
> +    irq = pt->irq;
> +
> +    if ( inject_interrupt(pt) )
> +    {
> +        pt->scheduled += pt->period;
> +        pt->do_not_freeze = 0;
> +        cb = pt->cb;
> +        cb_priv = pt->priv;
>      }
>      else
>      {
> -        pt->last_plt_gtime += pt->period;
> -        if ( --pt->pending_intr_nr == 0 )

... this original code? Otherwise I can't see why the condition
guards a pt_process_missed_ticks() invocation.

> @@ -617,20 +556,29 @@ void pt_adjust_global_vcpu_target(struct vcpu *v)
>      write_unlock(&pl_time->vhpet.lock);
>  }
>  
> -
>  static void pt_resume(struct periodic_time *pt)
>  {
> +    struct vcpu *v;
> +    time_cb *cb = NULL;
> +    void *cb_priv;
> +
>      if ( pt->vcpu == NULL )
>          return;
>  
>      pt_lock(pt);
> -    if ( pt->pending_intr_nr && !pt->on_list )
> +    if ( pt->pending_intr_nr && !pt->on_list && inject_interrupt(pt) )
>      {
> +        pt->pending_intr_nr--;
> +        cb = pt->cb;
> +        cb_priv = pt->priv;
> +        v = pt->vcpu;
>          pt->on_list = 1;
>          list_add(&pt->list, &pt->vcpu->arch.hvm.tm_list);
> -        vcpu_kick(pt->vcpu);
>      }
>      pt_unlock(pt);
> +
> +    if ( cb )
> +        cb(v, cb_priv);
>  }

I'm afraid until we raise our supported gcc versions baseline, v and
cb_priv will need an initializer at the top of the function just like
cb.

Jan
Roger Pau Monne April 14, 2021, 1:37 p.m. UTC | #2
On Wed, Apr 14, 2021 at 12:28:43PM +0200, Jan Beulich wrote:
> On 31.03.2021 12:33, Roger Pau Monne wrote:
> > ---
> >  xen/arch/x86/hvm/svm/intr.c   |   3 -
> >  xen/arch/x86/hvm/vmx/intr.c   |  59 ------
> >  xen/arch/x86/hvm/vpt.c        | 334 ++++++++++++++--------------------
> >  xen/include/asm-x86/hvm/vpt.h |   5 +-
> >  4 files changed, 143 insertions(+), 258 deletions(-)
> 
> Nice.
> 
> > @@ -285,189 +238,144 @@ static void pt_irq_fired(struct vcpu *v, struct periodic_time *pt)
> >              list_del(&pt->list);
> >          pt->on_list = false;
> >          pt->pending_intr_nr = 0;
> > +
> > +        return;
> >      }
> > -    else if ( mode_is(v->domain, one_missed_tick_pending) ||
> > -              mode_is(v->domain, no_missed_ticks_pending) )
> > +
> > +    if ( mode_is(v->domain, one_missed_tick_pending) ||
> > +         mode_is(v->domain, no_missed_ticks_pending) )
> >      {
> > -        pt->last_plt_gtime = hvm_get_guest_time(v);
> >          pt_process_missed_ticks(pt);
> >          pt->pending_intr_nr = 0; /* 'collapse' all missed ticks */
> > +    }
> > +    else if ( !pt->pending_intr_nr )
> > +        pt_process_missed_ticks(pt);
> 
> Did you lose a -- here? I.e. does the condition mean to match ...
> 
> > +    if ( !pt->pending_intr_nr )
> >          set_timer(&pt->timer, pt->scheduled);
> > +}
> > +
> > +static void pt_timer_fn(void *data)
> > +{
> > +    struct periodic_time *pt = data;
> > +    struct vcpu *v;
> > +    time_cb *cb = NULL;
> > +    void *cb_priv;
> > +    unsigned int irq;
> > +
> > +    pt_lock(pt);
> > +
> > +    v = pt->vcpu;
> > +    irq = pt->irq;
> > +
> > +    if ( inject_interrupt(pt) )
> > +    {
> > +        pt->scheduled += pt->period;
> > +        pt->do_not_freeze = 0;
> > +        cb = pt->cb;
> > +        cb_priv = pt->priv;
> >      }
> >      else
> >      {
> > -        pt->last_plt_gtime += pt->period;
> > -        if ( --pt->pending_intr_nr == 0 )
> 
> ... this original code? Otherwise I can't see why the condition
> guards a pt_process_missed_ticks() invocation.

I think the logic here changed enough to not match anymore. Certainly
pending_intr_nr shouldn't be decreased there, as pt_irq_fired is
invoked after an EOI in this patch, instead of being invoked when a
vpt related interrupt was injected. I think I should better rename
pt_irq_fired to pt_irq_eoi and that would make it clearer.

FWIW, decreasing pending_intr_nr should only be done after an
inject_interrupt call.

> > @@ -617,20 +556,29 @@ void pt_adjust_global_vcpu_target(struct vcpu *v)
> >      write_unlock(&pl_time->vhpet.lock);
> >  }
> >  
> > -
> >  static void pt_resume(struct periodic_time *pt)
> >  {
> > +    struct vcpu *v;
> > +    time_cb *cb = NULL;
> > +    void *cb_priv;
> > +
> >      if ( pt->vcpu == NULL )
> >          return;
> >  
> >      pt_lock(pt);
> > -    if ( pt->pending_intr_nr && !pt->on_list )
> > +    if ( pt->pending_intr_nr && !pt->on_list && inject_interrupt(pt) )
> >      {
> > +        pt->pending_intr_nr--;
> > +        cb = pt->cb;
> > +        cb_priv = pt->priv;
> > +        v = pt->vcpu;
> >          pt->on_list = 1;
> >          list_add(&pt->list, &pt->vcpu->arch.hvm.tm_list);
> > -        vcpu_kick(pt->vcpu);
> >      }
> >      pt_unlock(pt);
> > +
> > +    if ( cb )
> > +        cb(v, cb_priv);
> >  }
> 
> I'm afraid until we raise our supported gcc versions baseline, v and
> cb_priv will need an initializer at the top of the function just like
> cb.

Will add such initializations.

Thanks, Roger.
Jan Beulich April 14, 2021, 2:05 p.m. UTC | #3
On 14.04.2021 15:37, Roger Pau Monné wrote:
> On Wed, Apr 14, 2021 at 12:28:43PM +0200, Jan Beulich wrote:
>> On 31.03.2021 12:33, Roger Pau Monne wrote:
>>> ---
>>>  xen/arch/x86/hvm/svm/intr.c   |   3 -
>>>  xen/arch/x86/hvm/vmx/intr.c   |  59 ------
>>>  xen/arch/x86/hvm/vpt.c        | 334 ++++++++++++++--------------------
>>>  xen/include/asm-x86/hvm/vpt.h |   5 +-
>>>  4 files changed, 143 insertions(+), 258 deletions(-)
>>
>> Nice.
>>
>>> @@ -285,189 +238,144 @@ static void pt_irq_fired(struct vcpu *v, struct periodic_time *pt)
>>>              list_del(&pt->list);
>>>          pt->on_list = false;
>>>          pt->pending_intr_nr = 0;
>>> +
>>> +        return;
>>>      }
>>> -    else if ( mode_is(v->domain, one_missed_tick_pending) ||
>>> -              mode_is(v->domain, no_missed_ticks_pending) )
>>> +
>>> +    if ( mode_is(v->domain, one_missed_tick_pending) ||
>>> +         mode_is(v->domain, no_missed_ticks_pending) )
>>>      {
>>> -        pt->last_plt_gtime = hvm_get_guest_time(v);
>>>          pt_process_missed_ticks(pt);
>>>          pt->pending_intr_nr = 0; /* 'collapse' all missed ticks */
>>> +    }
>>> +    else if ( !pt->pending_intr_nr )
>>> +        pt_process_missed_ticks(pt);
>>
>> Did you lose a -- here? I.e. does the condition mean to match ...
>>
>>> +    if ( !pt->pending_intr_nr )
>>>          set_timer(&pt->timer, pt->scheduled);
>>> +}
>>> +
>>> +static void pt_timer_fn(void *data)
>>> +{
>>> +    struct periodic_time *pt = data;
>>> +    struct vcpu *v;
>>> +    time_cb *cb = NULL;
>>> +    void *cb_priv;
>>> +    unsigned int irq;
>>> +
>>> +    pt_lock(pt);
>>> +
>>> +    v = pt->vcpu;
>>> +    irq = pt->irq;
>>> +
>>> +    if ( inject_interrupt(pt) )
>>> +    {
>>> +        pt->scheduled += pt->period;
>>> +        pt->do_not_freeze = 0;
>>> +        cb = pt->cb;
>>> +        cb_priv = pt->priv;
>>>      }
>>>      else
>>>      {
>>> -        pt->last_plt_gtime += pt->period;
>>> -        if ( --pt->pending_intr_nr == 0 )
>>
>> ... this original code? Otherwise I can't see why the condition
>> guards a pt_process_missed_ticks() invocation.
> 
> I think the logic here changed enough to not match anymore. Certainly
> pending_intr_nr shouldn't be decreased there, as pt_irq_fired is
> invoked after an EOI in this patch, instead of being invoked when a
> vpt related interrupt was injected. I think I should better rename
> pt_irq_fired to pt_irq_eoi and that would make it clearer.

But pt_process_missed_ticks() should be called only when a tick was
missed, shouldn't it? Or actually, looking at the function, I guess
I'm confused. Does your patch change the meaning of the field?

> FWIW, decreasing pending_intr_nr should only be done after an
> inject_interrupt call.

Then this line (which you leave in place)

         pt->pending_intr_nr = 0; /* 'collapse' all missed ticks */

is contradicting the (new) model.

Jan
Roger Pau Monne April 14, 2021, 2:20 p.m. UTC | #4
On Wed, Apr 14, 2021 at 04:05:20PM +0200, Jan Beulich wrote:
> On 14.04.2021 15:37, Roger Pau Monné wrote:
> > On Wed, Apr 14, 2021 at 12:28:43PM +0200, Jan Beulich wrote:
> >> On 31.03.2021 12:33, Roger Pau Monne wrote:
> >>> ---
> >>>  xen/arch/x86/hvm/svm/intr.c   |   3 -
> >>>  xen/arch/x86/hvm/vmx/intr.c   |  59 ------
> >>>  xen/arch/x86/hvm/vpt.c        | 334 ++++++++++++++--------------------
> >>>  xen/include/asm-x86/hvm/vpt.h |   5 +-
> >>>  4 files changed, 143 insertions(+), 258 deletions(-)
> >>
> >> Nice.
> >>
> >>> @@ -285,189 +238,144 @@ static void pt_irq_fired(struct vcpu *v, struct periodic_time *pt)
> >>>              list_del(&pt->list);
> >>>          pt->on_list = false;
> >>>          pt->pending_intr_nr = 0;
> >>> +
> >>> +        return;
> >>>      }
> >>> -    else if ( mode_is(v->domain, one_missed_tick_pending) ||
> >>> -              mode_is(v->domain, no_missed_ticks_pending) )
> >>> +
> >>> +    if ( mode_is(v->domain, one_missed_tick_pending) ||
> >>> +         mode_is(v->domain, no_missed_ticks_pending) )
> >>>      {
> >>> -        pt->last_plt_gtime = hvm_get_guest_time(v);
> >>>          pt_process_missed_ticks(pt);
> >>>          pt->pending_intr_nr = 0; /* 'collapse' all missed ticks */
> >>> +    }
> >>> +    else if ( !pt->pending_intr_nr )
> >>> +        pt_process_missed_ticks(pt);
> >>
> >> Did you lose a -- here? I.e. does the condition mean to match ...
> >>
> >>> +    if ( !pt->pending_intr_nr )
> >>>          set_timer(&pt->timer, pt->scheduled);
> >>> +}
> >>> +
> >>> +static void pt_timer_fn(void *data)
> >>> +{
> >>> +    struct periodic_time *pt = data;
> >>> +    struct vcpu *v;
> >>> +    time_cb *cb = NULL;
> >>> +    void *cb_priv;
> >>> +    unsigned int irq;
> >>> +
> >>> +    pt_lock(pt);
> >>> +
> >>> +    v = pt->vcpu;
> >>> +    irq = pt->irq;
> >>> +
> >>> +    if ( inject_interrupt(pt) )
> >>> +    {
> >>> +        pt->scheduled += pt->period;
> >>> +        pt->do_not_freeze = 0;
> >>> +        cb = pt->cb;
> >>> +        cb_priv = pt->priv;
> >>>      }
> >>>      else
> >>>      {
> >>> -        pt->last_plt_gtime += pt->period;
> >>> -        if ( --pt->pending_intr_nr == 0 )
> >>
> >> ... this original code? Otherwise I can't see why the condition
> >> guards a pt_process_missed_ticks() invocation.
> > 
> > I think the logic here changed enough to not match anymore. Certainly
> > pending_intr_nr shouldn't be decreased there, as pt_irq_fired is
> > invoked after an EOI in this patch, instead of being invoked when a
> > vpt related interrupt was injected. I think I should better rename
> > pt_irq_fired to pt_irq_eoi and that would make it clearer.
> 
> But pt_process_missed_ticks() should be called only when a tick was
> missed, shouldn't it?

No, I think the purpose of the function is to update the
pending_intr_nr field, ie: calculate if and how many ticks have been
missed.

It's fine for pt_process_missed_ticks to return without having changed
pending_intr_nr at all if no ticks have been missed.

> Or actually, looking at the function, I guess
> I'm confused. Does your patch change the meaning of the field?

Not really, I think pt_process_missed_ticks has always had this logic.
The pending_intr_nr filed should still have the same logic, account
for the amount of missed ticks up to the value in the scheduled field.

> > FWIW, decreasing pending_intr_nr should only be done after an
> > inject_interrupt call.
> 
> Then this line (which you leave in place)
> 
>          pt->pending_intr_nr = 0; /* 'collapse' all missed ticks */
> 
> is contradicting the (new) model.

Oh, right, that's mode specific. no_missed_ticks_pending will just
drop any interrupts that haven't been injected when they should have
been. I had the 'account missed ticks' mode in mind when I wrote that.

I now have pt_irq_fired as:

static void irq_eoi(struct periodic_time *pt)
{
    if ( pt->one_shot )
    {
        pt->pending_intr_nr = 0;
        return;
    }

    pt_process_missed_ticks(pt);
    /* 'collapse' missed ticks according to the selected mode. */
    switch ( pt->vcpu->domain->arch.hvm.params[HVM_PARAM_TIMER_MODE] )
    {
    case HVMPTM_one_missed_tick_pending:
        pt->pending_intr_nr = min(pt->pending_intr_nr, 1u);
        break;

    case HVMPTM_no_missed_ticks_pending:
        pt->pending_intr_nr = 0;
        break;
    }

    if ( !pt->pending_intr_nr )
    {
        /* Make sure timer follows vCPU. */
        migrate_timer(&pt->timer, current->processor);
        set_timer(&pt->timer, pt->scheduled);
    }
}

But I think it's best if I post it as a new version, so you can see
the context.

Thanks, Roger.
diff mbox series

Patch

diff --git a/xen/arch/x86/hvm/svm/intr.c b/xen/arch/x86/hvm/svm/intr.c
index 7f815d23078..2ee2332253b 100644
--- a/xen/arch/x86/hvm/svm/intr.c
+++ b/xen/arch/x86/hvm/svm/intr.c
@@ -146,8 +146,6 @@  void svm_intr_assist(void)
         return;
 
     /* Crank the handle on interrupt state. */
-    pt_update_irq(v);
-
     do {
         intack = hvm_vcpu_has_pending_irq(v);
         if ( likely(intack.source == hvm_intsrc_none) )
@@ -219,7 +217,6 @@  void svm_intr_assist(void)
     {
         HVMTRACE_2D(INJ_VIRQ, intack.vector, /*fake=*/ 0);
         svm_inject_extint(v, intack.vector);
-        pt_intr_post(v, intack);
     }
 
     /* Is there another IRQ to queue up behind this one? */
diff --git a/xen/arch/x86/hvm/vmx/intr.c b/xen/arch/x86/hvm/vmx/intr.c
index 80bfbb47878..3fcc7073db2 100644
--- a/xen/arch/x86/hvm/vmx/intr.c
+++ b/xen/arch/x86/hvm/vmx/intr.c
@@ -203,7 +203,6 @@  static int nvmx_intr_intercept(struct vcpu *v, struct hvm_intack intack)
             {
                 /* for now, duplicate the ack path in vmx_intr_assist */
                 hvm_vcpu_ack_pending_irq(v, intack);
-                pt_intr_post(v, intack);
 
                 intack = hvm_vcpu_has_pending_irq(v);
                 if ( unlikely(intack.source != hvm_intsrc_none) )
@@ -242,7 +241,6 @@  void vmx_intr_assist(void)
     struct vcpu *v = current;
     unsigned int tpr_threshold = 0;
     enum hvm_intblk intblk;
-    int pt_vector;
 
     /* Block event injection when single step with MTF. */
     if ( unlikely(v->arch.hvm.single_step) )
@@ -263,8 +261,6 @@  void vmx_intr_assist(void)
 #endif
 
     /* Crank the handle on interrupt state. */
-    pt_vector = pt_update_irq(v);
-
     do {
         unsigned long intr_info;
 
@@ -337,58 +333,6 @@  void vmx_intr_assist(void)
     {
         unsigned long status;
 
-       /*
-        * intack.vector is the highest priority vector. So we set eoi_exit_bitmap
-        * for intack.vector - give a chance to post periodic time interrupts when
-        * periodic time interrupts become the highest one
-        */
-        if ( pt_vector != -1 )
-        {
-#ifndef NDEBUG
-            /*
-             * We assert that intack.vector is the highest priority vector for
-             * only an interrupt from vlapic can reach this point and the
-             * highest vector is chosen in hvm_vcpu_has_pending_irq().
-             * But, in fact, the assertion failed sometimes. It is suspected
-             * that PIR is not synced to vIRR which makes pt_vector is left in
-             * PIR. In order to verify this suspicion, dump some information
-             * when the assertion fails.
-             */
-            if ( unlikely(intack.vector < pt_vector) )
-            {
-                const struct vlapic *vlapic;
-                const struct pi_desc *pi_desc;
-                const uint32_t *word;
-                unsigned int i;
-
-                printk(XENLOG_ERR "%pv: intack: %u:%02x pt: %02x\n",
-                       current, intack.source, intack.vector, pt_vector);
-
-                vlapic = vcpu_vlapic(v);
-                if ( vlapic && vlapic->regs )
-                {
-                    word = (const void *)&vlapic->regs->data[APIC_IRR];
-                    printk(XENLOG_ERR "vIRR:");
-                    for ( i = X86_NR_VECTORS / 32; i-- ; )
-                        printk(" %08x", word[i*4]);
-                    printk("\n");
-                }
-
-                pi_desc = &v->arch.hvm.vmx.pi_desc;
-                if ( pi_desc )
-                {
-                    word = (const void *)&pi_desc->pir;
-                    printk(XENLOG_ERR " PIR:");
-                    for ( i = X86_NR_VECTORS / 32; i-- ; )
-                        printk(" %08x", word[i]);
-                    printk("\n");
-                }
-            }
-#endif
-            ASSERT(intack.vector >= pt_vector);
-            vmx_set_eoi_exit_bitmap(v, intack.vector);
-        }
-
         /* we need update the RVI field */
         __vmread(GUEST_INTR_STATUS, &status);
         status &= ~VMX_GUEST_INTR_STATUS_SUBFIELD_BITMASK;
@@ -397,14 +341,11 @@  void vmx_intr_assist(void)
         __vmwrite(GUEST_INTR_STATUS, status);
 
         vmx_sync_exit_bitmap(v);
-
-        pt_intr_post(v, intack);
     }
     else
     {
         HVMTRACE_2D(INJ_VIRQ, intack.vector, /*fake=*/ 0);
         vmx_inject_extint(intack.vector, intack.source);
-        pt_intr_post(v, intack);
     }
 
     /* Is there another IRQ to queue up behind this one? */
diff --git a/xen/arch/x86/hvm/vpt.c b/xen/arch/x86/hvm/vpt.c
index 4c2afe2e915..f951cd95bcf 100644
--- a/xen/arch/x86/hvm/vpt.c
+++ b/xen/arch/x86/hvm/vpt.c
@@ -27,6 +27,8 @@ 
 #define mode_is(d, name) \
     ((d)->arch.hvm.params[HVM_PARAM_TIMER_MODE] == HVMPTM_##name)
 
+static bool inject_interrupt(struct periodic_time *pt);
+
 void hvm_init_guest_time(struct domain *d)
 {
     struct pl_time *pl = d->arch.hvm.pl_time;
@@ -76,35 +78,6 @@  void hvm_set_guest_time(struct vcpu *v, u64 guest_time)
     }
 }
 
-static int pt_irq_vector(struct periodic_time *pt, enum hvm_intsrc src)
-{
-    struct vcpu *v = pt->vcpu;
-    unsigned int gsi, isa_irq;
-    int vector;
-
-    if ( pt->source == PTSRC_lapic )
-        return pt->irq;
-
-    isa_irq = pt->irq;
-
-    if ( src == hvm_intsrc_pic )
-        return (v->domain->arch.hvm.vpic[isa_irq >> 3].irq_base
-                + (isa_irq & 7));
-
-    ASSERT(src == hvm_intsrc_lapic);
-    gsi = pt->source == PTSRC_isa ? hvm_isa_irq_to_gsi(isa_irq) : pt->irq;
-    vector = vioapic_get_vector(v->domain, gsi);
-    if ( vector < 0 )
-    {
-        dprintk(XENLOG_WARNING, "d%u: invalid GSI (%u) for platform timer\n",
-                v->domain->domain_id, gsi);
-        domain_crash(v->domain);
-        return -1;
-    }
-
-    return vector;
-}
-
 static int pt_irq_masked(struct periodic_time *pt)
 {
     struct vcpu *v = pt->vcpu;
@@ -247,34 +220,14 @@  void pt_restore_timer(struct vcpu *v)
     pt_vcpu_lock(v);
 
     list_for_each_entry ( pt, head, list )
-    {
         if ( pt->pending_intr_nr == 0 )
-        {
-            pt_process_missed_ticks(pt);
             set_timer(&pt->timer, pt->scheduled);
-        }
-    }
 
     pt_thaw_time(v);
 
     pt_vcpu_unlock(v);
 }
 
-static void pt_timer_fn(void *data)
-{
-    struct periodic_time *pt = data;
-
-    pt_lock(pt);
-
-    pt->pending_intr_nr++;
-    pt->scheduled += pt->period;
-    pt->do_not_freeze = 0;
-
-    vcpu_kick(pt->vcpu);
-
-    pt_unlock(pt);
-}
-
 static void pt_irq_fired(struct vcpu *v, struct periodic_time *pt)
 {
     pt->irq_issued = false;
@@ -285,189 +238,144 @@  static void pt_irq_fired(struct vcpu *v, struct periodic_time *pt)
             list_del(&pt->list);
         pt->on_list = false;
         pt->pending_intr_nr = 0;
+
+        return;
     }
-    else if ( mode_is(v->domain, one_missed_tick_pending) ||
-              mode_is(v->domain, no_missed_ticks_pending) )
+
+    if ( mode_is(v->domain, one_missed_tick_pending) ||
+         mode_is(v->domain, no_missed_ticks_pending) )
     {
-        pt->last_plt_gtime = hvm_get_guest_time(v);
         pt_process_missed_ticks(pt);
         pt->pending_intr_nr = 0; /* 'collapse' all missed ticks */
+    }
+    else if ( !pt->pending_intr_nr )
+        pt_process_missed_ticks(pt);
+
+    if ( !pt->pending_intr_nr )
         set_timer(&pt->timer, pt->scheduled);
+}
+
+static void pt_timer_fn(void *data)
+{
+    struct periodic_time *pt = data;
+    struct vcpu *v;
+    time_cb *cb = NULL;
+    void *cb_priv;
+    unsigned int irq;
+
+    pt_lock(pt);
+
+    v = pt->vcpu;
+    irq = pt->irq;
+
+    if ( inject_interrupt(pt) )
+    {
+        pt->scheduled += pt->period;
+        pt->do_not_freeze = 0;
+        cb = pt->cb;
+        cb_priv = pt->priv;
     }
     else
     {
-        pt->last_plt_gtime += pt->period;
-        if ( --pt->pending_intr_nr == 0 )
-        {
-            pt_process_missed_ticks(pt);
-            if ( pt->pending_intr_nr == 0 )
-                set_timer(&pt->timer, pt->scheduled);
-        }
+        /* Masked. */
+        if ( pt->on_list )
+            list_del(&pt->list);
+        pt->on_list = false;
+        pt->pending_intr_nr++;
     }
 
-    if ( mode_is(v->domain, delay_for_missed_ticks) &&
-         (hvm_get_guest_time(v) < pt->last_plt_gtime) )
-        hvm_set_guest_time(v, pt->last_plt_gtime);
+    pt_unlock(pt);
+
+    if ( cb )
+        cb(v, cb_priv);
 }
 
-int pt_update_irq(struct vcpu *v)
+/*
+ * The same callback is shared between LAPIC and PIC/IO-APIC based timers, as
+ * we ignore the first parameter that's different between them.
+ */
+static void eoi_callback(unsigned int unused, void *data)
 {
-    struct list_head *head = &v->arch.hvm.tm_list;
-    struct periodic_time *pt, *temp, *earliest_pt;
-    uint64_t max_lag;
-    int irq, pt_vector = -1;
-    bool level;
+    struct periodic_time *pt = data;
+    struct vcpu *v;
+    time_cb *cb = NULL;
+    void *cb_priv;
 
-    pt_vcpu_lock(v);
+    pt_lock(pt);
 
-    earliest_pt = NULL;
-    max_lag = -1ULL;
-    list_for_each_entry_safe ( pt, temp, head, list )
+    pt_irq_fired(pt->vcpu, pt);
+    if ( pt->pending_intr_nr )
     {
-        if ( pt->pending_intr_nr )
+        if ( inject_interrupt(pt) )
+        {
+            pt->pending_intr_nr--;
+            cb = pt->cb;
+            cb_priv = pt->priv;
+            v = pt->vcpu;
+        }
+        else
         {
-            /* RTC code takes care of disabling the timer itself. */
-            if ( (pt->irq != RTC_IRQ || !pt->priv) && pt_irq_masked(pt) &&
-                 /* Level interrupts should be asserted even if masked. */
-                 !pt->level )
-            {
-                /* suspend timer emulation */
+            /* Masked. */
+            if ( pt->on_list )
                 list_del(&pt->list);
-                pt->on_list = 0;
-            }
-            else
-            {
-                if ( (pt->last_plt_gtime + pt->period) < max_lag )
-                {
-                    max_lag = pt->last_plt_gtime + pt->period;
-                    earliest_pt = pt;
-                }
-            }
+            pt->on_list = false;
         }
     }
 
-    if ( earliest_pt == NULL )
-    {
-        pt_vcpu_unlock(v);
-        return -1;
-    }
+    pt_unlock(pt);
 
-    earliest_pt->irq_issued = 1;
-    irq = earliest_pt->irq;
-    level = earliest_pt->level;
+    if ( cb )
+        cb(v, cb_priv);
+}
 
-    pt_vcpu_unlock(v);
+static bool inject_interrupt(struct periodic_time *pt)
+{
+    struct vcpu *v = pt->vcpu;
+    struct domain *d = v->domain;
+    unsigned int irq = pt->irq;
 
-    switch ( earliest_pt->source )
+    if ( pt_irq_masked(pt) )
+        return false;
+
+    switch ( pt->source )
     {
     case PTSRC_lapic:
-        /*
-         * If periodic timer interrupt is handled by lapic, its vector in
-         * IRR is returned and used to set eoi_exit_bitmap for virtual
-         * interrupt delivery case. Otherwise return -1 to do nothing.
-         */
-        vlapic_set_irq(vcpu_vlapic(v), irq, 0);
-        pt_vector = irq;
+        vlapic_set_irq_callback(vcpu_vlapic(v), pt->irq, 0, eoi_callback, pt);
         break;
 
     case PTSRC_isa:
-        hvm_isa_irq_deassert(v->domain, irq);
+        hvm_isa_irq_deassert(d, irq);
         if ( platform_legacy_irq(irq) && vlapic_accept_pic_intr(v) &&
-             v->domain->arch.hvm.vpic[irq >> 3].int_output )
-            hvm_isa_irq_assert(v->domain, irq, NULL);
+             d->arch.hvm.vpic[irq >> 3].int_output )
+            hvm_isa_irq_assert(d, irq, NULL);
         else
-        {
-            pt_vector = hvm_isa_irq_assert(v->domain, irq, vioapic_get_vector);
-            /*
-             * hvm_isa_irq_assert may not set the corresponding bit in vIRR
-             * when mask field of IOAPIC RTE is set. Check it again.
-             */
-            if ( pt_vector < 0 || !vlapic_test_irq(vcpu_vlapic(v), pt_vector) )
-                pt_vector = -1;
-        }
+            hvm_isa_irq_assert(d, irq, vioapic_get_vector);
         break;
 
     case PTSRC_ioapic:
-        pt_vector = hvm_ioapic_assert(v->domain, irq, level);
-        if ( pt_vector < 0 || !vlapic_test_irq(vcpu_vlapic(v), pt_vector) )
-        {
-            pt_vector = -1;
-            if ( level )
-            {
-                /*
-                 * Level interrupts are always asserted because the pin assert
-                 * count is incremented regardless of whether the pin is masked
-                 * or the vector latched in IRR, so also execute the callback
-                 * associated with the timer.
-                 */
-                time_cb *cb = NULL;
-                void *cb_priv = NULL;
-
-                pt_vcpu_lock(v);
-                /* Make sure the timer is still on the list. */
-                list_for_each_entry ( pt, &v->arch.hvm.tm_list, list )
-                    if ( pt == earliest_pt )
-                    {
-                        pt_irq_fired(v, pt);
-                        cb = pt->cb;
-                        cb_priv = pt->priv;
-                        break;
-                    }
-                pt_vcpu_unlock(v);
-
-                if ( cb != NULL )
-                    cb(v, cb_priv);
-            }
-        }
+        hvm_ioapic_assert(d, irq, pt->level);
         break;
     }
 
-    return pt_vector;
-}
-
-static struct periodic_time *is_pt_irq(
-    struct vcpu *v, struct hvm_intack intack)
-{
-    struct list_head *head = &v->arch.hvm.tm_list;
-    struct periodic_time *pt;
-
-    list_for_each_entry ( pt, head, list )
+    switch ( d->arch.hvm.params[HVM_PARAM_TIMER_MODE] )
     {
-        if ( pt->pending_intr_nr && pt->irq_issued &&
-             (intack.vector == pt_irq_vector(pt, intack.source)) )
-            return pt;
-    }
-
-    return NULL;
-}
-
-void pt_intr_post(struct vcpu *v, struct hvm_intack intack)
-{
-    struct periodic_time *pt;
-    time_cb *cb;
-    void *cb_priv;
-
-    if ( intack.source == hvm_intsrc_vector )
-        return;
+    case HVMPTM_one_missed_tick_pending:
+    case HVMPTM_no_missed_ticks_pending:
+        pt->last_plt_gtime = hvm_get_guest_time(v);
+        break;
 
-    pt_vcpu_lock(v);
+    case HVMPTM_delay_for_missed_ticks:
+        pt->last_plt_gtime += pt->period;
+        if ( hvm_get_guest_time(v) < pt->last_plt_gtime )
+            hvm_set_guest_time(v, pt->last_plt_gtime);
+        break;
 
-    pt = is_pt_irq(v, intack);
-    if ( pt == NULL )
-    {
-        pt_vcpu_unlock(v);
-        return;
+    default:
+        pt->last_plt_gtime += pt->period;
+        break;
     }
 
-    pt_irq_fired(v, pt);
-
-    cb = pt->cb;
-    cb_priv = pt->priv;
-
-    pt_vcpu_unlock(v);
-
-    if ( cb != NULL )
-        cb(v, cb_priv);
+    return true;
 }
 
 void pt_migrate(struct vcpu *v)
@@ -543,6 +451,24 @@  void create_periodic_time(
     pt->cb = cb;
     pt->priv = data;
 
+    switch ( pt->source )
+    {
+        int rc;
+
+    case PTSRC_isa:
+        irq = hvm_isa_irq_to_gsi(irq);
+        /* fallthrough */
+    case PTSRC_ioapic:
+        pt->eoi_cb.callback = eoi_callback;
+        pt->eoi_cb.data = pt;
+        rc = hvm_gsi_register_callback(v->domain, irq, &pt->eoi_cb);
+        if ( rc )
+            gdprintk(XENLOG_WARNING,
+                     "unable to register callback for timer GSI %u source %u: %d\n",
+                     irq, pt->source, rc);
+        break;
+    }
+
     pt->on_list = 1;
     list_add(&pt->list, &v->arch.hvm.tm_list);
 
@@ -554,6 +480,8 @@  void create_periodic_time(
 
 void destroy_periodic_time(struct periodic_time *pt)
 {
+    unsigned int gsi;
+
     /* Was this structure previously initialised by create_periodic_time()? */
     if ( pt->vcpu == NULL )
         return;
@@ -563,6 +491,17 @@  void destroy_periodic_time(struct periodic_time *pt)
         list_del(&pt->list);
     pt->on_list = 0;
     pt->pending_intr_nr = 0;
+
+    gsi = pt->irq;
+    switch ( pt->source )
+    {
+    case PTSRC_isa:
+        gsi = hvm_isa_irq_to_gsi(pt->irq);
+        /* fallthrough */
+    case PTSRC_ioapic:
+        hvm_gsi_unregister_callback(pt->vcpu->domain, gsi, &pt->eoi_cb);
+        break;
+    }
     pt_unlock(pt);
 
     /*
@@ -617,20 +556,29 @@  void pt_adjust_global_vcpu_target(struct vcpu *v)
     write_unlock(&pl_time->vhpet.lock);
 }
 
-
 static void pt_resume(struct periodic_time *pt)
 {
+    struct vcpu *v;
+    time_cb *cb = NULL;
+    void *cb_priv;
+
     if ( pt->vcpu == NULL )
         return;
 
     pt_lock(pt);
-    if ( pt->pending_intr_nr && !pt->on_list )
+    if ( pt->pending_intr_nr && !pt->on_list && inject_interrupt(pt) )
     {
+        pt->pending_intr_nr--;
+        cb = pt->cb;
+        cb_priv = pt->priv;
+        v = pt->vcpu;
         pt->on_list = 1;
         list_add(&pt->list, &pt->vcpu->arch.hvm.tm_list);
-        vcpu_kick(pt->vcpu);
     }
     pt_unlock(pt);
+
+    if ( cb )
+        cb(v, cb_priv);
 }
 
 void pt_may_unmask_irq(struct domain *d, struct periodic_time *vlapic_pt)
diff --git a/xen/include/asm-x86/hvm/vpt.h b/xen/include/asm-x86/hvm/vpt.h
index 39d26cbda49..9440fe4ac7d 100644
--- a/xen/include/asm-x86/hvm/vpt.h
+++ b/xen/include/asm-x86/hvm/vpt.h
@@ -23,6 +23,7 @@ 
 #include <xen/list.h>
 #include <xen/rwlock.h>
 #include <asm/hvm/hvm.h>
+#include <asm/hvm/irq.h>
 
 /*
  * Abstract layer of periodic time, one short time.
@@ -50,6 +51,7 @@  struct periodic_time {
     struct timer timer;         /* ac_timer */
     time_cb *cb;
     void *priv;                 /* point back to platform time source */
+    struct hvm_gsi_eoi_callback eoi_cb; /* EOI callback registration data */
 };
 
 
@@ -145,9 +147,6 @@  struct pl_time {    /* platform time */
 
 void pt_save_timer(struct vcpu *v);
 void pt_restore_timer(struct vcpu *v);
-int pt_update_irq(struct vcpu *v);
-struct hvm_intack;
-void pt_intr_post(struct vcpu *v, struct hvm_intack intack);
 void pt_migrate(struct vcpu *v);
 
 void pt_adjust_global_vcpu_target(struct vcpu *v);