diff mbox

[RFC] Use return value from kvm_set_irq() to re-inject PIT interrupts.

Message ID 20090824120623.GC30093@redhat.com (mailing list archive)
State New, archived
Headers show

Commit Message

Gleb Natapov Aug. 24, 2009, 12:06 p.m. UTC
Use return value from kvm_set_irq() to track coalesced PIT interrupts
instead of ack/mask notifiers.

Signed-off-by: Gleb Natapov <gleb@redhat.com>
--
			Gleb.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Comments

Marcelo Tosatti Aug. 24, 2009, 4:32 p.m. UTC | #1
On Mon, Aug 24, 2009 at 03:06:23PM +0300, Gleb Natapov wrote:
> Use return value from kvm_set_irq() to track coalesced PIT interrupts
> instead of ack/mask notifiers.

Gleb,

What is the advantage of doing so?

Ack notifiers are asynchronous notifications. Using the return value
from kvm_set_irq implies that timer emulation is based on a "tick
generating device" on the host side.

What I mean is that the ack notifications are useful, since they are
asynchronous.

Supposing your goal is to get rid of ack notifiers, due to their burden 
in irqchip code?

> Signed-off-by: Gleb Natapov <gleb@redhat.com>
> diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c
> index b857ca3..0b63991 100644
> --- a/arch/x86/kvm/i8254.c
> +++ b/arch/x86/kvm/i8254.c
> @@ -231,20 +231,7 @@ int pit_has_pending_timer(struct kvm_vcpu *vcpu)
>  {
>  	struct kvm_pit *pit = vcpu->kvm->arch.vpit;
>  
> -	if (pit && kvm_vcpu_is_bsp(vcpu) && pit->pit_state.irq_ack)
> -		return atomic_read(&pit->pit_state.pit_timer.pending);
> -	return 0;
> -}
> -
> -static void kvm_pit_ack_irq(struct kvm_irq_ack_notifier *kian)
> -{
> -	struct kvm_kpit_state *ps = container_of(kian, struct kvm_kpit_state,
> -						 irq_ack_notifier);
> -	spin_lock(&ps->inject_lock);
> -	if (atomic_dec_return(&ps->pit_timer.pending) < 0)
> -		atomic_inc(&ps->pit_timer.pending);
> -	ps->irq_ack = 1;
> -	spin_unlock(&ps->inject_lock);
> +	return atomic_read(&pit->pit_state.pit_timer.pending);
>  }
>  
>  void __kvm_migrate_pit_timer(struct kvm_vcpu *vcpu)
> @@ -297,7 +284,6 @@ static void create_pit_timer(struct kvm_kpit_state *ps, u32 val, int is_period)
>  	pt->vcpu = pt->kvm->bsp_vcpu;
>  
>  	atomic_set(&pt->pending, 0);
> -	ps->irq_ack = 1;
>  
>  	hrtimer_start(&pt->timer, ktime_add_ns(ktime_get(), interval),
>  		      HRTIMER_MODE_ABS);
> @@ -577,17 +563,6 @@ void kvm_pit_reset(struct kvm_pit *pit)
>  	mutex_unlock(&pit->pit_state.lock);
>  
>  	atomic_set(&pit->pit_state.pit_timer.pending, 0);
> -	pit->pit_state.irq_ack = 1;
> -}
> -
> -static void pit_mask_notifer(struct kvm_irq_mask_notifier *kimn, bool mask)
> -{
> -	struct kvm_pit *pit = container_of(kimn, struct kvm_pit, mask_notifier);
> -
> -	if (!mask) {
> -		atomic_set(&pit->pit_state.pit_timer.pending, 0);
> -		pit->pit_state.irq_ack = 1;
> -	}
>  }
>  
>  static const struct kvm_io_device_ops pit_dev_ops = {
> @@ -619,7 +594,6 @@ struct kvm_pit *kvm_create_pit(struct kvm *kvm, u32 flags)
>  
>  	mutex_init(&pit->pit_state.lock);
>  	mutex_lock(&pit->pit_state.lock);
> -	spin_lock_init(&pit->pit_state.inject_lock);
>  
>  	kvm->arch.vpit = pit;
>  	pit->kvm = kvm;
> @@ -628,17 +602,11 @@ struct kvm_pit *kvm_create_pit(struct kvm *kvm, u32 flags)
>  	pit_state->pit = pit;
>  	hrtimer_init(&pit_state->pit_timer.timer,
>  		     CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
> -	pit_state->irq_ack_notifier.gsi = 0;
> -	pit_state->irq_ack_notifier.irq_acked = kvm_pit_ack_irq;
> -	kvm_register_irq_ack_notifier(kvm, &pit_state->irq_ack_notifier);
>  	pit_state->pit_timer.reinject = true;
>  	mutex_unlock(&pit->pit_state.lock);
>  
>  	kvm_pit_reset(pit);
>  
> -	pit->mask_notifier.func = pit_mask_notifer;
> -	kvm_register_irq_mask_notifier(kvm, 0, &pit->mask_notifier);
> -
>  	kvm_iodevice_init(&pit->dev, &pit_dev_ops);
>  	ret = __kvm_io_bus_register_dev(&kvm->pio_bus, &pit->dev);
>  	if (ret < 0)
> @@ -670,10 +638,6 @@ void kvm_free_pit(struct kvm *kvm)
>  	struct hrtimer *timer;
>  
>  	if (kvm->arch.vpit) {
> -		kvm_unregister_irq_mask_notifier(kvm, 0,
> -					       &kvm->arch.vpit->mask_notifier);
> -		kvm_unregister_irq_ack_notifier(kvm,
> -				&kvm->arch.vpit->pit_state.irq_ack_notifier);
>  		mutex_lock(&kvm->arch.vpit->pit_state.lock);
>  		timer = &kvm->arch.vpit->pit_state.pit_timer.timer;
>  		hrtimer_cancel(timer);
> @@ -683,12 +647,12 @@ void kvm_free_pit(struct kvm *kvm)
>  	}
>  }
>  
> -static void __inject_pit_timer_intr(struct kvm *kvm)
> +static int __inject_pit_timer_intr(struct kvm *kvm)
>  {
>  	struct kvm_vcpu *vcpu;
> -	int i;
> +	int i, r;
>  
> -	kvm_set_irq(kvm, kvm->arch.vpit->irq_source_id, 0, 1);
> +	r = kvm_set_irq(kvm, kvm->arch.vpit->irq_source_id, 0, 1);
>  	kvm_set_irq(kvm, kvm->arch.vpit->irq_source_id, 0, 0);
>  
>  	/*
> @@ -703,6 +667,8 @@ static void __inject_pit_timer_intr(struct kvm *kvm)
>  	if (kvm->arch.vapics_in_nmi_mode > 0)
>  		kvm_for_each_vcpu(i, vcpu, kvm)
>  			kvm_apic_nmi_wd_deliver(vcpu);
> +
> +	return r;
>  }
>  
>  void kvm_inject_pit_timer_irqs(struct kvm_vcpu *vcpu)
> @@ -711,20 +677,14 @@ void kvm_inject_pit_timer_irqs(struct kvm_vcpu *vcpu)
>  	struct kvm *kvm = vcpu->kvm;
>  	struct kvm_kpit_state *ps;
>  
> -	if (pit) {
> -		int inject = 0;
> -		ps = &pit->pit_state;
> -
> -		/* Try to inject pending interrupts when
> -		 * last one has been acked.
> -		 */
> -		spin_lock(&ps->inject_lock);
> -		if (atomic_read(&ps->pit_timer.pending) && ps->irq_ack) {
> -			ps->irq_ack = 0;
> -			inject = 1;
> -		}
> -		spin_unlock(&ps->inject_lock);
> -		if (inject)
> -			__inject_pit_timer_intr(kvm);
> -	}
> +	if (!pit)
> +		return;
> +
> +	ps = &pit->pit_state;
> +
> +	if (!atomic_read(&ps->pit_timer.pending))
> +		return;
> +
> +	if (__inject_pit_timer_intr(kvm))
> +		atomic_dec(&ps->pit_timer.pending);
>  }
> diff --git a/arch/x86/kvm/i8254.h b/arch/x86/kvm/i8254.h
> index d4c1c7f..19937da 100644
> --- a/arch/x86/kvm/i8254.h
> +++ b/arch/x86/kvm/i8254.h
> @@ -28,8 +28,6 @@ struct kvm_kpit_state {
>  	struct mutex lock;
>  	struct kvm_pit *pit;
>  	spinlock_t inject_lock;
> -	unsigned long irq_ack;
> -	struct kvm_irq_ack_notifier irq_ack_notifier;
>  };
>  
>  struct kvm_pit {
> @@ -39,7 +37,6 @@ struct kvm_pit {
>  	struct kvm *kvm;
>  	struct kvm_kpit_state pit_state;
>  	int irq_source_id;
> -	struct kvm_irq_mask_notifier mask_notifier;
>  };
>  
>  #define KVM_PIT_BASE_ADDRESS	    0x40
> --
> 			Gleb.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Gleb Natapov Aug. 24, 2009, 5:16 p.m. UTC | #2
On Mon, Aug 24, 2009 at 01:32:56PM -0300, Marcelo Tosatti wrote:
> On Mon, Aug 24, 2009 at 03:06:23PM +0300, Gleb Natapov wrote:
> > Use return value from kvm_set_irq() to track coalesced PIT interrupts
> > instead of ack/mask notifiers.
> 
> Gleb,
> 
> What is the advantage of doing so?
> 
Current code very fragile and relies on hacks to work. Lets take calling
of ack notifiers on pic reset as an example. Why is it needed? It is
obviously wrong thing to do from assigned devices POV. Why ioapic calls
mask notifiers but pic doesn't?

Besides diffstat for the patch shows:
2 files changed, 16 insertions(+), 59 deletions(-)

43 lines less for the same functionality. Looks like clear win to me.

> Ack notifiers are asynchronous notifications. Using the return value
> from kvm_set_irq implies that timer emulation is based on a "tick
> generating device" on the host side.
No notification is needed in the first place. You know immediately
if injection fails or not. I don't see why "using return value from
kvm_set_irq implies that timer emulation is based on a "tick generating
device" on the host side"? What can you do with ack notifiers that can't
be done without?

> What I mean is that the ack notifications are useful, since they are
> asynchronous.
> 
What I mean is that no notification is needed at all since result is
known immediately.

> Supposing your goal is to get rid of ack notifiers, due to their burden 
> in irqchip code?
> 
Unfortunately to get rid of ack notifiers we need to get rid of assigned
devices. I will gladly do that, but I doubt Avi shares my enthusiasm.
The patch to remove mask notification already sits in my patch queue though.

> > Signed-off-by: Gleb Natapov <gleb@redhat.com>
> > diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c
> > index b857ca3..0b63991 100644
> > --- a/arch/x86/kvm/i8254.c
> > +++ b/arch/x86/kvm/i8254.c
> > @@ -231,20 +231,7 @@ int pit_has_pending_timer(struct kvm_vcpu *vcpu)
> >  {
> >  	struct kvm_pit *pit = vcpu->kvm->arch.vpit;
> >  
> > -	if (pit && kvm_vcpu_is_bsp(vcpu) && pit->pit_state.irq_ack)
> > -		return atomic_read(&pit->pit_state.pit_timer.pending);
> > -	return 0;
> > -}
> > -
> > -static void kvm_pit_ack_irq(struct kvm_irq_ack_notifier *kian)
> > -{
> > -	struct kvm_kpit_state *ps = container_of(kian, struct kvm_kpit_state,
> > -						 irq_ack_notifier);
> > -	spin_lock(&ps->inject_lock);
> > -	if (atomic_dec_return(&ps->pit_timer.pending) < 0)
> > -		atomic_inc(&ps->pit_timer.pending);
> > -	ps->irq_ack = 1;
> > -	spin_unlock(&ps->inject_lock);
> > +	return atomic_read(&pit->pit_state.pit_timer.pending);
> >  }
> >  
> >  void __kvm_migrate_pit_timer(struct kvm_vcpu *vcpu)
> > @@ -297,7 +284,6 @@ static void create_pit_timer(struct kvm_kpit_state *ps, u32 val, int is_period)
> >  	pt->vcpu = pt->kvm->bsp_vcpu;
> >  
> >  	atomic_set(&pt->pending, 0);
> > -	ps->irq_ack = 1;
> >  
> >  	hrtimer_start(&pt->timer, ktime_add_ns(ktime_get(), interval),
> >  		      HRTIMER_MODE_ABS);
> > @@ -577,17 +563,6 @@ void kvm_pit_reset(struct kvm_pit *pit)
> >  	mutex_unlock(&pit->pit_state.lock);
> >  
> >  	atomic_set(&pit->pit_state.pit_timer.pending, 0);
> > -	pit->pit_state.irq_ack = 1;
> > -}
> > -
> > -static void pit_mask_notifer(struct kvm_irq_mask_notifier *kimn, bool mask)
> > -{
> > -	struct kvm_pit *pit = container_of(kimn, struct kvm_pit, mask_notifier);
> > -
> > -	if (!mask) {
> > -		atomic_set(&pit->pit_state.pit_timer.pending, 0);
> > -		pit->pit_state.irq_ack = 1;
> > -	}
> >  }
> >  
> >  static const struct kvm_io_device_ops pit_dev_ops = {
> > @@ -619,7 +594,6 @@ struct kvm_pit *kvm_create_pit(struct kvm *kvm, u32 flags)
> >  
> >  	mutex_init(&pit->pit_state.lock);
> >  	mutex_lock(&pit->pit_state.lock);
> > -	spin_lock_init(&pit->pit_state.inject_lock);
> >  
> >  	kvm->arch.vpit = pit;
> >  	pit->kvm = kvm;
> > @@ -628,17 +602,11 @@ struct kvm_pit *kvm_create_pit(struct kvm *kvm, u32 flags)
> >  	pit_state->pit = pit;
> >  	hrtimer_init(&pit_state->pit_timer.timer,
> >  		     CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
> > -	pit_state->irq_ack_notifier.gsi = 0;
> > -	pit_state->irq_ack_notifier.irq_acked = kvm_pit_ack_irq;
> > -	kvm_register_irq_ack_notifier(kvm, &pit_state->irq_ack_notifier);
> >  	pit_state->pit_timer.reinject = true;
> >  	mutex_unlock(&pit->pit_state.lock);
> >  
> >  	kvm_pit_reset(pit);
> >  
> > -	pit->mask_notifier.func = pit_mask_notifer;
> > -	kvm_register_irq_mask_notifier(kvm, 0, &pit->mask_notifier);
> > -
> >  	kvm_iodevice_init(&pit->dev, &pit_dev_ops);
> >  	ret = __kvm_io_bus_register_dev(&kvm->pio_bus, &pit->dev);
> >  	if (ret < 0)
> > @@ -670,10 +638,6 @@ void kvm_free_pit(struct kvm *kvm)
> >  	struct hrtimer *timer;
> >  
> >  	if (kvm->arch.vpit) {
> > -		kvm_unregister_irq_mask_notifier(kvm, 0,
> > -					       &kvm->arch.vpit->mask_notifier);
> > -		kvm_unregister_irq_ack_notifier(kvm,
> > -				&kvm->arch.vpit->pit_state.irq_ack_notifier);
> >  		mutex_lock(&kvm->arch.vpit->pit_state.lock);
> >  		timer = &kvm->arch.vpit->pit_state.pit_timer.timer;
> >  		hrtimer_cancel(timer);
> > @@ -683,12 +647,12 @@ void kvm_free_pit(struct kvm *kvm)
> >  	}
> >  }
> >  
> > -static void __inject_pit_timer_intr(struct kvm *kvm)
> > +static int __inject_pit_timer_intr(struct kvm *kvm)
> >  {
> >  	struct kvm_vcpu *vcpu;
> > -	int i;
> > +	int i, r;
> >  
> > -	kvm_set_irq(kvm, kvm->arch.vpit->irq_source_id, 0, 1);
> > +	r = kvm_set_irq(kvm, kvm->arch.vpit->irq_source_id, 0, 1);
> >  	kvm_set_irq(kvm, kvm->arch.vpit->irq_source_id, 0, 0);
> >  
> >  	/*
> > @@ -703,6 +667,8 @@ static void __inject_pit_timer_intr(struct kvm *kvm)
> >  	if (kvm->arch.vapics_in_nmi_mode > 0)
> >  		kvm_for_each_vcpu(i, vcpu, kvm)
> >  			kvm_apic_nmi_wd_deliver(vcpu);
> > +
> > +	return r;
> >  }
> >  
> >  void kvm_inject_pit_timer_irqs(struct kvm_vcpu *vcpu)
> > @@ -711,20 +677,14 @@ void kvm_inject_pit_timer_irqs(struct kvm_vcpu *vcpu)
> >  	struct kvm *kvm = vcpu->kvm;
> >  	struct kvm_kpit_state *ps;
> >  
> > -	if (pit) {
> > -		int inject = 0;
> > -		ps = &pit->pit_state;
> > -
> > -		/* Try to inject pending interrupts when
> > -		 * last one has been acked.
> > -		 */
> > -		spin_lock(&ps->inject_lock);
> > -		if (atomic_read(&ps->pit_timer.pending) && ps->irq_ack) {
> > -			ps->irq_ack = 0;
> > -			inject = 1;
> > -		}
> > -		spin_unlock(&ps->inject_lock);
> > -		if (inject)
> > -			__inject_pit_timer_intr(kvm);
> > -	}
> > +	if (!pit)
> > +		return;
> > +
> > +	ps = &pit->pit_state;
> > +
> > +	if (!atomic_read(&ps->pit_timer.pending))
> > +		return;
> > +
> > +	if (__inject_pit_timer_intr(kvm))
> > +		atomic_dec(&ps->pit_timer.pending);
> >  }
> > diff --git a/arch/x86/kvm/i8254.h b/arch/x86/kvm/i8254.h
> > index d4c1c7f..19937da 100644
> > --- a/arch/x86/kvm/i8254.h
> > +++ b/arch/x86/kvm/i8254.h
> > @@ -28,8 +28,6 @@ struct kvm_kpit_state {
> >  	struct mutex lock;
> >  	struct kvm_pit *pit;
> >  	spinlock_t inject_lock;
> > -	unsigned long irq_ack;
> > -	struct kvm_irq_ack_notifier irq_ack_notifier;
> >  };
> >  
> >  struct kvm_pit {
> > @@ -39,7 +37,6 @@ struct kvm_pit {
> >  	struct kvm *kvm;
> >  	struct kvm_kpit_state pit_state;
> >  	int irq_source_id;
> > -	struct kvm_irq_mask_notifier mask_notifier;
> >  };
> >  
> >  #define KVM_PIT_BASE_ADDRESS	    0x40
> > --
> > 			Gleb.

--
			Gleb.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Marcelo Tosatti Aug. 24, 2009, 5:44 p.m. UTC | #3
On Mon, Aug 24, 2009 at 08:16:46PM +0300, Gleb Natapov wrote:
> On Mon, Aug 24, 2009 at 01:32:56PM -0300, Marcelo Tosatti wrote:
> > On Mon, Aug 24, 2009 at 03:06:23PM +0300, Gleb Natapov wrote:
> > > Use return value from kvm_set_irq() to track coalesced PIT interrupts
> > > instead of ack/mask notifiers.
> > 
> > Gleb,
> > 
> > What is the advantage of doing so?
> > 
> Current code very fragile and relies on hacks to work. Lets take calling
> of ack notifiers on pic reset as an example. Why is it needed? 

To signal the ack notifiers users that, in case of reset with pending
IRR, the given interrupt has been "acked" (its an artificial ack event).

Is there a need to differentiate between actual interrupt ack and reset
with pending IRR? At the time this code was written, there was no
indication that differentation would be necessary.

> It is obviously wrong thing to do from assigned devices POV.

Thats not entirely clear to me. So what happens if a guest with PIC
assigned device resets with a pending IRR? The host interrupt line will
be kept disabled, even though the guest is able to process further
interrupts?

> Why ioapic calls mask notifiers but pic doesn't?

Because it is not implemented.

> Besides diffstat for the patch shows:
> 2 files changed, 16 insertions(+), 59 deletions(-)
> 
> 43 lines less for the same functionality. Looks like clear win to me.
> 
> > Ack notifiers are asynchronous notifications. Using the return value
> > from kvm_set_irq implies that timer emulation is based on a "tick
> > generating device" on the host side.
> No notification is needed in the first place. You know immediately
> if injection fails or not. I don't see why "using return value from
> kvm_set_irq implies that timer emulation is based on a "tick generating
> device" on the host side"? What can you do with ack notifiers that can't
> be done without?

If you don't have a host timer emulating the guest PIT, to periodically
bang on kvm_set_irq, how do you know when to attempt reinjection?

You keep calling kvm_set_irq on every guest entry to figure out when 
reinjection is possible?

> > What I mean is that the ack notifications are useful, since they are
> > asynchronous.
> > 
> What I mean is that no notification is needed at all since result is
> known immediately.
?
> > Supposing your goal is to get rid of ack notifiers, due to their burden 
> > in irqchip code?
> > 
> Unfortunately to get rid of ack notifiers we need to get rid of assigned
> devices. I will gladly do that, but I doubt Avi shares my enthusiasm.
> The patch to remove mask notification already sits in my patch queue though.
> 
> > > Signed-off-by: Gleb Natapov <gleb@redhat.com>
> > > diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c
> > > index b857ca3..0b63991 100644
> > > --- a/arch/x86/kvm/i8254.c
> > > +++ b/arch/x86/kvm/i8254.c
> > > @@ -231,20 +231,7 @@ int pit_has_pending_timer(struct kvm_vcpu *vcpu)
> > >  {
> > >  	struct kvm_pit *pit = vcpu->kvm->arch.vpit;
> > >  
> > > -	if (pit && kvm_vcpu_is_bsp(vcpu) && pit->pit_state.irq_ack)
> > > -		return atomic_read(&pit->pit_state.pit_timer.pending);
> > > -	return 0;
> > > -}
> > > -
> > > -static void kvm_pit_ack_irq(struct kvm_irq_ack_notifier *kian)
> > > -{
> > > -	struct kvm_kpit_state *ps = container_of(kian, struct kvm_kpit_state,
> > > -						 irq_ack_notifier);
> > > -	spin_lock(&ps->inject_lock);
> > > -	if (atomic_dec_return(&ps->pit_timer.pending) < 0)
> > > -		atomic_inc(&ps->pit_timer.pending);
> > > -	ps->irq_ack = 1;
> > > -	spin_unlock(&ps->inject_lock);
> > > +	return atomic_read(&pit->pit_state.pit_timer.pending);
> > >  }
> > >  
> > >  void __kvm_migrate_pit_timer(struct kvm_vcpu *vcpu)
> > > @@ -297,7 +284,6 @@ static void create_pit_timer(struct kvm_kpit_state *ps, u32 val, int is_period)
> > >  	pt->vcpu = pt->kvm->bsp_vcpu;
> > >  
> > >  	atomic_set(&pt->pending, 0);
> > > -	ps->irq_ack = 1;
> > >  
> > >  	hrtimer_start(&pt->timer, ktime_add_ns(ktime_get(), interval),
> > >  		      HRTIMER_MODE_ABS);
> > > @@ -577,17 +563,6 @@ void kvm_pit_reset(struct kvm_pit *pit)
> > >  	mutex_unlock(&pit->pit_state.lock);
> > >  
> > >  	atomic_set(&pit->pit_state.pit_timer.pending, 0);
> > > -	pit->pit_state.irq_ack = 1;
> > > -}
> > > -
> > > -static void pit_mask_notifer(struct kvm_irq_mask_notifier *kimn, bool mask)
> > > -{
> > > -	struct kvm_pit *pit = container_of(kimn, struct kvm_pit, mask_notifier);
> > > -
> > > -	if (!mask) {
> > > -		atomic_set(&pit->pit_state.pit_timer.pending, 0);
> > > -		pit->pit_state.irq_ack = 1;
> > > -	}
> > >  }
> > >  
> > >  static const struct kvm_io_device_ops pit_dev_ops = {
> > > @@ -619,7 +594,6 @@ struct kvm_pit *kvm_create_pit(struct kvm *kvm, u32 flags)
> > >  
> > >  	mutex_init(&pit->pit_state.lock);
> > >  	mutex_lock(&pit->pit_state.lock);
> > > -	spin_lock_init(&pit->pit_state.inject_lock);
> > >  
> > >  	kvm->arch.vpit = pit;
> > >  	pit->kvm = kvm;
> > > @@ -628,17 +602,11 @@ struct kvm_pit *kvm_create_pit(struct kvm *kvm, u32 flags)
> > >  	pit_state->pit = pit;
> > >  	hrtimer_init(&pit_state->pit_timer.timer,
> > >  		     CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
> > > -	pit_state->irq_ack_notifier.gsi = 0;
> > > -	pit_state->irq_ack_notifier.irq_acked = kvm_pit_ack_irq;
> > > -	kvm_register_irq_ack_notifier(kvm, &pit_state->irq_ack_notifier);
> > >  	pit_state->pit_timer.reinject = true;
> > >  	mutex_unlock(&pit->pit_state.lock);
> > >  
> > >  	kvm_pit_reset(pit);
> > >  
> > > -	pit->mask_notifier.func = pit_mask_notifer;
> > > -	kvm_register_irq_mask_notifier(kvm, 0, &pit->mask_notifier);
> > > -
> > >  	kvm_iodevice_init(&pit->dev, &pit_dev_ops);
> > >  	ret = __kvm_io_bus_register_dev(&kvm->pio_bus, &pit->dev);
> > >  	if (ret < 0)
> > > @@ -670,10 +638,6 @@ void kvm_free_pit(struct kvm *kvm)
> > >  	struct hrtimer *timer;
> > >  
> > >  	if (kvm->arch.vpit) {
> > > -		kvm_unregister_irq_mask_notifier(kvm, 0,
> > > -					       &kvm->arch.vpit->mask_notifier);
> > > -		kvm_unregister_irq_ack_notifier(kvm,
> > > -				&kvm->arch.vpit->pit_state.irq_ack_notifier);
> > >  		mutex_lock(&kvm->arch.vpit->pit_state.lock);
> > >  		timer = &kvm->arch.vpit->pit_state.pit_timer.timer;
> > >  		hrtimer_cancel(timer);
> > > @@ -683,12 +647,12 @@ void kvm_free_pit(struct kvm *kvm)
> > >  	}
> > >  }
> > >  
> > > -static void __inject_pit_timer_intr(struct kvm *kvm)
> > > +static int __inject_pit_timer_intr(struct kvm *kvm)
> > >  {
> > >  	struct kvm_vcpu *vcpu;
> > > -	int i;
> > > +	int i, r;
> > >  
> > > -	kvm_set_irq(kvm, kvm->arch.vpit->irq_source_id, 0, 1);
> > > +	r = kvm_set_irq(kvm, kvm->arch.vpit->irq_source_id, 0, 1);
> > >  	kvm_set_irq(kvm, kvm->arch.vpit->irq_source_id, 0, 0);
> > >  
> > >  	/*
> > > @@ -703,6 +667,8 @@ static void __inject_pit_timer_intr(struct kvm *kvm)
> > >  	if (kvm->arch.vapics_in_nmi_mode > 0)
> > >  		kvm_for_each_vcpu(i, vcpu, kvm)
> > >  			kvm_apic_nmi_wd_deliver(vcpu);
> > > +
> > > +	return r;
> > >  }
> > >  
> > >  void kvm_inject_pit_timer_irqs(struct kvm_vcpu *vcpu)
> > > @@ -711,20 +677,14 @@ void kvm_inject_pit_timer_irqs(struct kvm_vcpu *vcpu)
> > >  	struct kvm *kvm = vcpu->kvm;
> > >  	struct kvm_kpit_state *ps;
> > >  
> > > -	if (pit) {
> > > -		int inject = 0;
> > > -		ps = &pit->pit_state;
> > > -
> > > -		/* Try to inject pending interrupts when
> > > -		 * last one has been acked.
> > > -		 */
> > > -		spin_lock(&ps->inject_lock);
> > > -		if (atomic_read(&ps->pit_timer.pending) && ps->irq_ack) {
> > > -			ps->irq_ack = 0;
> > > -			inject = 1;
> > > -		}
> > > -		spin_unlock(&ps->inject_lock);
> > > -		if (inject)
> > > -			__inject_pit_timer_intr(kvm);
> > > -	}
> > > +	if (!pit)
> > > +		return;
> > > +
> > > +	ps = &pit->pit_state;
> > > +
> > > +	if (!atomic_read(&ps->pit_timer.pending))
> > > +		return;
> > > +
> > > +	if (__inject_pit_timer_intr(kvm))
> > > +		atomic_dec(&ps->pit_timer.pending);
> > >  }
> > > diff --git a/arch/x86/kvm/i8254.h b/arch/x86/kvm/i8254.h
> > > index d4c1c7f..19937da 100644
> > > --- a/arch/x86/kvm/i8254.h
> > > +++ b/arch/x86/kvm/i8254.h
> > > @@ -28,8 +28,6 @@ struct kvm_kpit_state {
> > >  	struct mutex lock;
> > >  	struct kvm_pit *pit;
> > >  	spinlock_t inject_lock;
> > > -	unsigned long irq_ack;
> > > -	struct kvm_irq_ack_notifier irq_ack_notifier;
> > >  };
> > >  
> > >  struct kvm_pit {
> > > @@ -39,7 +37,6 @@ struct kvm_pit {
> > >  	struct kvm *kvm;
> > >  	struct kvm_kpit_state pit_state;
> > >  	int irq_source_id;
> > > -	struct kvm_irq_mask_notifier mask_notifier;
> > >  };
> > >  
> > >  #define KVM_PIT_BASE_ADDRESS	    0x40
> > > --
> > > 			Gleb.
> 
> --
> 			Gleb.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Gleb Natapov Aug. 24, 2009, 6:19 p.m. UTC | #4
On Mon, Aug 24, 2009 at 02:44:27PM -0300, Marcelo Tosatti wrote:
> On Mon, Aug 24, 2009 at 08:16:46PM +0300, Gleb Natapov wrote:
> > On Mon, Aug 24, 2009 at 01:32:56PM -0300, Marcelo Tosatti wrote:
> > > On Mon, Aug 24, 2009 at 03:06:23PM +0300, Gleb Natapov wrote:
> > > > Use return value from kvm_set_irq() to track coalesced PIT interrupts
> > > > instead of ack/mask notifiers.
> > > 
> > > Gleb,
> > > 
> > > What is the advantage of doing so?
> > > 
> > Current code very fragile and relies on hacks to work. Lets take calling
> > of ack notifiers on pic reset as an example. Why is it needed? 
> 
> To signal the ack notifiers users that, in case of reset with pending
> IRR, the given interrupt has been "acked" (its an artificial ack event).
> 
But IRR was not acked. The reason it is done is that otherwise the
current logic will prevent further interrupt injection. 

> Is there a need to differentiate between actual interrupt ack and reset
> with pending IRR? At the time this code was written, there was no
> indication that differentation would be necessary.
This is two different things. Ack notifiers should be called when guest
acks interrupt. Calling it on reset is wrong (see below). We can add reset
notifiers, but we just build yet another infrastructure to support
current reinjection scheme.

> 
> > It is obviously wrong thing to do from assigned devices POV.
> 
> Thats not entirely clear to me. So what happens if a guest with PIC
> assigned device resets with a pending IRR? The host interrupt line will
> be kept disabled, even though the guest is able to process further
> interrupts?
The host interrupt line will be enabled (assigned device ack notifier
does this) without clearing interrupt condition in assigned device
(guest hasn't acked irq so how can we be sure it ran device's irq
handler?). Host will hang.

> > Why ioapic calls mask notifiers but pic doesn't?
> 
> Because it is not implemented.
I see that. Why? Why it was important to implement for ioapic but not
for pic? Do we know what doesn't work now?

> 
> > Besides diffstat for the patch shows:
> > 2 files changed, 16 insertions(+), 59 deletions(-)
> > 
> > 43 lines less for the same functionality. Looks like clear win to me.
> > 
> > > Ack notifiers are asynchronous notifications. Using the return value
> > > from kvm_set_irq implies that timer emulation is based on a "tick
> > > generating device" on the host side.
> > No notification is needed in the first place. You know immediately
> > if injection fails or not. I don't see why "using return value from
> > kvm_set_irq implies that timer emulation is based on a "tick generating
> > device" on the host side"? What can you do with ack notifiers that can't
> > be done without?
> 
> If you don't have a host timer emulating the guest PIT, to periodically
> bang on kvm_set_irq, how do you know when to attempt reinjection?
> 
> You keep calling kvm_set_irq on every guest entry to figure out when 
> reinjection is possible?
If we have timer to inject then yes. It is relatively cheap. Most of the
time pending count will be zero.

--
			Gleb.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Gleb Natapov Aug. 24, 2009, 7:01 p.m. UTC | #5
On Mon, Aug 24, 2009 at 09:19:05PM +0300, Gleb Natapov wrote:
> > > It is obviously wrong thing to do from assigned devices POV.
> > 
> > Thats not entirely clear to me. So what happens if a guest with PIC
> > assigned device resets with a pending IRR? The host interrupt line will
> > be kept disabled, even though the guest is able to process further
> > interrupts?
> The host interrupt line will be enabled (assigned device ack notifier
> does this) without clearing interrupt condition in assigned device
> (guest hasn't acked irq so how can we be sure it ran device's irq
> handler?). Host will hang.
> 
Actually, on the second thought, it will not hang. Next time host
interrupt handler runs it will disable interrupt once again.

--
			Gleb.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Marcelo Tosatti Aug. 26, 2009, 12:43 p.m. UTC | #6
On Mon, Aug 24, 2009 at 09:19:05PM +0300, Gleb Natapov wrote:
> > > Current code very fragile and relies on hacks to work. Lets take calling
> > > of ack notifiers on pic reset as an example. Why is it needed? 
> > 
> > To signal the ack notifiers users that, in case of reset with pending
> > IRR, the given interrupt has been "acked" (its an artificial ack event).
> > 
> But IRR was not acked. The reason it is done is that otherwise the
> current logic will prevent further interrupt injection. 

Or will keep the host irq disabled, for the assigned device case (in
case you drop the hackish ack notification from pic_reset).

I don't think it exists there because of PIT reinjection only, it seems
a generic problem for users of ack notifiers (a reset notifier as you
mentioned would also do it, and be cleaner).

> > Is there a need to differentiate between actual interrupt ack and reset
> > with pending IRR? At the time this code was written, there was no
> > indication that differentation would be necessary.
> This is two different things. Ack notifiers should be called when guest
> acks interrupt. Calling it on reset is wrong (see below). We can add reset
> notifiers, but we just build yet another infrastructure to support
> current reinjection scheme.

Its not specific to PIT reinjection.

Anything that relies on ack notification to perform some action (either
reinjection or host irq line enablement or some other use) suffers from
the same thing.

You might argue that a separate reset notification is more appropriate.

> > > It is obviously wrong thing to do from assigned devices POV.
> > 
> > Thats not entirely clear to me. So what happens if a guest with PIC
> > assigned device resets with a pending IRR? The host interrupt line will
> > be kept disabled, even though the guest is able to process further
> > interrupts?
> The host interrupt line will be enabled (assigned device ack notifier
> does this) without clearing interrupt condition in assigned device
> (guest hasn't acked irq so how can we be sure it ran device's irq
> handler?). Host will hang.
> 
> > > Why ioapic calls mask notifiers but pic doesn't?
> > 
> > Because it is not implemented.
> I see that. Why? Why it was important to implement for ioapic but not
> for pic? 

4780c65904f0fc4e312ee2da9383eacbe04e61ea

> Do we know what doesn't work now?

What you mean?

> > > Besides diffstat for the patch shows:
> > > 2 files changed, 16 insertions(+), 59 deletions(-)
> > > 
> > > 43 lines less for the same functionality. Looks like clear win to me.
> > > 
> > > > Ack notifiers are asynchronous notifications. Using the return value
> > > > from kvm_set_irq implies that timer emulation is based on a "tick
> > > > generating device" on the host side.
> > > No notification is needed in the first place. You know immediately
> > > if injection fails or not. I don't see why "using return value from
> > > kvm_set_irq implies that timer emulation is based on a "tick generating
> > > device" on the host side"? What can you do with ack notifiers that can't
> > > be done without?
> > 
> > If you don't have a host timer emulating the guest PIT, to periodically
> > bang on kvm_set_irq, how do you know when to attempt reinjection?
> > 
> > You keep calling kvm_set_irq on every guest entry to figure out when 
> > reinjection is possible?
> If we have timer to inject then yes. It is relatively cheap. Most of the
> time pending count will be zero.

Won't work with non-tick-based emulation on the host.

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Marcelo Tosatti Aug. 26, 2009, 12:45 p.m. UTC | #7
On Mon, Aug 24, 2009 at 10:01:50PM +0300, Gleb Natapov wrote:
> On Mon, Aug 24, 2009 at 09:19:05PM +0300, Gleb Natapov wrote:
> > > > It is obviously wrong thing to do from assigned devices POV.
> > > 
> > > Thats not entirely clear to me. So what happens if a guest with PIC
> > > assigned device resets with a pending IRR? The host interrupt line will
> > > be kept disabled, even though the guest is able to process further
> > > interrupts?
> > The host interrupt line will be enabled (assigned device ack notifier
> > does this) without clearing interrupt condition in assigned device
> > (guest hasn't acked irq so how can we be sure it ran device's irq
> > handler?). Host will hang.
> > 
> Actually, on the second thought, it will not hang. Next time host
> interrupt handler runs it will disable interrupt once again.

Right. And if you don't signal ack notification on reset with pending
IRR the host line will be kept disabled.

As said on other email, its not specific to PIT reinjection logic. 
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Gleb Natapov Aug. 26, 2009, 1:19 p.m. UTC | #8
On Wed, Aug 26, 2009 at 09:43:48AM -0300, Marcelo Tosatti wrote:
> On Mon, Aug 24, 2009 at 09:19:05PM +0300, Gleb Natapov wrote:
> > > > Current code very fragile and relies on hacks to work. Lets take calling
> > > > of ack notifiers on pic reset as an example. Why is it needed? 
> > > 
> > > To signal the ack notifiers users that, in case of reset with pending
> > > IRR, the given interrupt has been "acked" (its an artificial ack event).
> > > 
> > But IRR was not acked. The reason it is done is that otherwise the
> > current logic will prevent further interrupt injection. 
> 
> Or will keep the host irq disabled, for the assigned device case (in
> case you drop the hackish ack notification from pic_reset).
> 
> I don't think it exists there because of PIT reinjection only, it seems
> a generic problem for users of ack notifiers (a reset notifier as you
> mentioned would also do it, and be cleaner).
> 
Yes, I agree pic reset should be propagated to assigned devices somehow.

> > > Is there a need to differentiate between actual interrupt ack and reset
> > > with pending IRR? At the time this code was written, there was no
> > > indication that differentation would be necessary.
> > This is two different things. Ack notifiers should be called when guest
> > acks interrupt. Calling it on reset is wrong (see below). We can add reset
> > notifiers, but we just build yet another infrastructure to support
> > current reinjection scheme.
> 
> Its not specific to PIT reinjection.
> 
> Anything that relies on ack notification to perform some action (either
> reinjection or host irq line enablement or some other use) suffers from
> the same thing.
> 
> You might argue that a separate reset notification is more appropriate.
> 
> > > > It is obviously wrong thing to do from assigned devices POV.
> > > 
> > > Thats not entirely clear to me. So what happens if a guest with PIC
> > > assigned device resets with a pending IRR? The host interrupt line will
> > > be kept disabled, even though the guest is able to process further
> > > interrupts?
> > The host interrupt line will be enabled (assigned device ack notifier
> > does this) without clearing interrupt condition in assigned device
> > (guest hasn't acked irq so how can we be sure it ran device's irq
> > handler?). Host will hang.
> > 
> > > > Why ioapic calls mask notifiers but pic doesn't?
> > > 
> > > Because it is not implemented.
> > I see that. Why? Why it was important to implement for ioapic but not
> > for pic? 
> 
> 4780c65904f0fc4e312ee2da9383eacbe04e61ea
> 
This commit and previous one adds infrastructure to fix a bug that is
there only because how we choose to do pit reinjection. Do it differently
and you can revert both of them.

> > Do we know what doesn't work now?
> 
> What you mean?
I mean that pit doesn't call mask notifier so similar bug to 4780c65
hides somewhere out there. How can we test it?

> 
> > > > Besides diffstat for the patch shows:
> > > > 2 files changed, 16 insertions(+), 59 deletions(-)
> > > > 
> > > > 43 lines less for the same functionality. Looks like clear win to me.
> > > > 
> > > > > Ack notifiers are asynchronous notifications. Using the return value
> > > > > from kvm_set_irq implies that timer emulation is based on a "tick
> > > > > generating device" on the host side.
> > > > No notification is needed in the first place. You know immediately
> > > > if injection fails or not. I don't see why "using return value from
> > > > kvm_set_irq implies that timer emulation is based on a "tick generating
> > > > device" on the host side"? What can you do with ack notifiers that can't
> > > > be done without?
> > > 
> > > If you don't have a host timer emulating the guest PIT, to periodically
> > > bang on kvm_set_irq, how do you know when to attempt reinjection?
> > > 
> > > You keep calling kvm_set_irq on every guest entry to figure out when 
> > > reinjection is possible?
> > If we have timer to inject then yes. It is relatively cheap. Most of the
> > time pending count will be zero.
> 
> Won't work with non-tick-based emulation on the host.
Why? This is the most important point, can you elaborate?

--
			Gleb.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Marcelo Tosatti Aug. 26, 2009, 1:38 p.m. UTC | #9
On Wed, Aug 26, 2009 at 04:19:17PM +0300, Gleb Natapov wrote:
> On Wed, Aug 26, 2009 at 09:43:48AM -0300, Marcelo Tosatti wrote:
> > On Mon, Aug 24, 2009 at 09:19:05PM +0300, Gleb Natapov wrote:
> > > > > Current code very fragile and relies on hacks to work. Lets take calling
> > > > > of ack notifiers on pic reset as an example. Why is it needed? 
> > > > 
> > > > To signal the ack notifiers users that, in case of reset with pending
> > > > IRR, the given interrupt has been "acked" (its an artificial ack event).
> > > > 
> > > But IRR was not acked. The reason it is done is that otherwise the
> > > current logic will prevent further interrupt injection. 
> > 
> > Or will keep the host irq disabled, for the assigned device case (in
> > case you drop the hackish ack notification from pic_reset).
> > 
> > I don't think it exists there because of PIT reinjection only, it seems
> > a generic problem for users of ack notifiers (a reset notifier as you
> > mentioned would also do it, and be cleaner).
> > 
> Yes, I agree pic reset should be propagated to assigned devices somehow.
> 
> > > > Is there a need to differentiate between actual interrupt ack and reset
> > > > with pending IRR? At the time this code was written, there was no
> > > > indication that differentation would be necessary.
> > > This is two different things. Ack notifiers should be called when guest
> > > acks interrupt. Calling it on reset is wrong (see below). We can add reset
> > > notifiers, but we just build yet another infrastructure to support
> > > current reinjection scheme.
> > 
> > Its not specific to PIT reinjection.
> > 
> > Anything that relies on ack notification to perform some action (either
> > reinjection or host irq line enablement or some other use) suffers from
> > the same thing.
> > 
> > You might argue that a separate reset notification is more appropriate.
> > 
> > > > > It is obviously wrong thing to do from assigned devices POV.
> > > > 
> > > > Thats not entirely clear to me. So what happens if a guest with PIC
> > > > assigned device resets with a pending IRR? The host interrupt line will
> > > > be kept disabled, even though the guest is able to process further
> > > > interrupts?
> > > The host interrupt line will be enabled (assigned device ack notifier
> > > does this) without clearing interrupt condition in assigned device
> > > (guest hasn't acked irq so how can we be sure it ran device's irq
> > > handler?). Host will hang.
> > > 
> > > > > Why ioapic calls mask notifiers but pic doesn't?
> > > > 
> > > > Because it is not implemented.
> > > I see that. Why? Why it was important to implement for ioapic but not
> > > for pic? 
> > 
> > 4780c65904f0fc4e312ee2da9383eacbe04e61ea
> > 
> This commit and previous one adds infrastructure to fix a bug that is
> there only because how we choose to do pit reinjection. Do it differently
> and you can revert both of them.
> 
> > > Do we know what doesn't work now?
> > 
> > What you mean?
> I mean that pit doesn't call mask notifier so similar bug to 4780c65
> hides somewhere out there. How can we test it?
> 
> > 
> > > > > Besides diffstat for the patch shows:
> > > > > 2 files changed, 16 insertions(+), 59 deletions(-)
> > > > > 
> > > > > 43 lines less for the same functionality. Looks like clear win to me.
> > > > > 
> > > > > > Ack notifiers are asynchronous notifications. Using the return value
> > > > > > from kvm_set_irq implies that timer emulation is based on a "tick
> > > > > > generating device" on the host side.
> > > > > No notification is needed in the first place. You know immediately
> > > > > if injection fails or not. I don't see why "using return value from
> > > > > kvm_set_irq implies that timer emulation is based on a "tick generating
> > > > > device" on the host side"? What can you do with ack notifiers that can't
> > > > > be done without?
> > > > 
> > > > If you don't have a host timer emulating the guest PIT, to periodically
> > > > bang on kvm_set_irq, how do you know when to attempt reinjection?
> > > > 
> > > > You keep calling kvm_set_irq on every guest entry to figure out when 
> > > > reinjection is possible?
> > > If we have timer to inject then yes. It is relatively cheap. Most of the
> > > time pending count will be zero.
> > 
> > Won't work with non-tick-based emulation on the host.
> Why? This is the most important point, can you elaborate?

From http://www.mail-archive.com/kvm@vger.kernel.org/msg18644.html.

An injectable timer interrupt is defined by:

- time(now) >= time(next_expiration)
- Previous timer interrupt has been acked (thus we can inject).

The thing is, sure you can drop ack notifiers and check IRR
on every guest entry, but why bother if you can receive an
asynchronous notification?

Would you prefer to replace

+               if (!ktimer->can_inject)

With
		kvm_set_irq()

?

Not relatively cheap.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Avi Kivity Aug. 26, 2009, 1:48 p.m. UTC | #10
On 08/26/2009 04:38 PM, Marcelo Tosatti wrote:
>
> An injectable timer interrupt is defined by:
>
> - time(now)>= time(next_expiration)
> - Previous timer interrupt has been acked (thus we can inject).
>
> The thing is, sure you can drop ack notifiers and check IRR
> on every guest entry, but why bother if you can receive an
> asynchronous notification?
>
> Would you prefer to replace
>
> +               if (!ktimer->can_inject)
>
> With
> 		kvm_set_irq()
>
> ?
>
> Not relatively cheap.
>    

Well, we expect it to be a rare condition that we have pending timer 
interrupts, so if it leads to significant code simplification, it can be 
worthwhile.
Marcelo Tosatti Aug. 26, 2009, 1:49 p.m. UTC | #11
On Wed, Aug 26, 2009 at 04:19:17PM +0300, Gleb Natapov wrote:
> On Wed, Aug 26, 2009 at 09:43:48AM -0300, Marcelo Tosatti wrote:
> > On Mon, Aug 24, 2009 at 09:19:05PM +0300, Gleb Natapov wrote:
> > > > > Current code very fragile and relies on hacks to work. Lets take calling
> > > > > of ack notifiers on pic reset as an example. Why is it needed? 
> > > > 
> > > > To signal the ack notifiers users that, in case of reset with pending
> > > > IRR, the given interrupt has been "acked" (its an artificial ack event).
> > > > 
> > > But IRR was not acked. The reason it is done is that otherwise the
> > > current logic will prevent further interrupt injection. 
> > 
> > Or will keep the host irq disabled, for the assigned device case (in
> > case you drop the hackish ack notification from pic_reset).
> > 
> > I don't think it exists there because of PIT reinjection only, it seems
> > a generic problem for users of ack notifiers (a reset notifier as you
> > mentioned would also do it, and be cleaner).
> > 
> Yes, I agree pic reset should be propagated to assigned devices somehow.
> 
> > > > Is there a need to differentiate between actual interrupt ack and reset
> > > > with pending IRR? At the time this code was written, there was no
> > > > indication that differentation would be necessary.
> > > This is two different things. Ack notifiers should be called when guest
> > > acks interrupt. Calling it on reset is wrong (see below). We can add reset
> > > notifiers, but we just build yet another infrastructure to support
> > > current reinjection scheme.
> > 
> > Its not specific to PIT reinjection.
> > 
> > Anything that relies on ack notification to perform some action (either
> > reinjection or host irq line enablement or some other use) suffers from
> > the same thing.
> > 
> > You might argue that a separate reset notification is more appropriate.
> > 
> > > > > It is obviously wrong thing to do from assigned devices POV.
> > > > 
> > > > Thats not entirely clear to me. So what happens if a guest with PIC
> > > > assigned device resets with a pending IRR? The host interrupt line will
> > > > be kept disabled, even though the guest is able to process further
> > > > interrupts?
> > > The host interrupt line will be enabled (assigned device ack notifier
> > > does this) without clearing interrupt condition in assigned device
> > > (guest hasn't acked irq so how can we be sure it ran device's irq
> > > handler?). Host will hang.
> > > 
> > > > > Why ioapic calls mask notifiers but pic doesn't?
> > > > 
> > > > Because it is not implemented.
> > > I see that. Why? Why it was important to implement for ioapic but not
> > > for pic? 
> > 
> > 4780c65904f0fc4e312ee2da9383eacbe04e61ea
> > 
> This commit and previous one adds infrastructure to fix a bug that is
> there only because how we choose to do pit reinjection. Do it differently
> and you can revert both of them.
> 
> > > Do we know what doesn't work now?
> > 
> > What you mean?
> I mean that pit doesn't call mask notifier so similar bug to 4780c65
> hides somewhere out there. How can we test it?

Program periodic PIT, mask irq0, wait a while, unmask irq0 ?


--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Gleb Natapov Aug. 26, 2009, 1:50 p.m. UTC | #12
On Wed, Aug 26, 2009 at 10:38:42AM -0300, Marcelo Tosatti wrote:
> > > > > If you don't have a host timer emulating the guest PIT, to periodically
> > > > > bang on kvm_set_irq, how do you know when to attempt reinjection?
> > > > > 
> > > > > You keep calling kvm_set_irq on every guest entry to figure out when 
> > > > > reinjection is possible?
> > > > If we have timer to inject then yes. It is relatively cheap. Most of the
> > > > time pending count will be zero.
> > > 
> > > Won't work with non-tick-based emulation on the host.
> > Why? This is the most important point, can you elaborate?
> 
> >From http://www.mail-archive.com/kvm@vger.kernel.org/msg18644.html.
> 
> An injectable timer interrupt is defined by:
> 
> - time(now) >= time(next_expiration)
> - Previous timer interrupt has been acked (thus we can inject).
> 
> The thing is, sure you can drop ack notifiers and check IRR
> on every guest entry, but why bother if you can receive an
> asynchronous notification?
> 
> Would you prefer to replace
> 
> +               if (!ktimer->can_inject)
> 
> With
> 		kvm_set_irq()
> 
> ?
> 
> Not relatively cheap.
Most of the times time(now) will be less then time(next_expiration) so
on most entries kvm_set_irq() will not be called at all. When interrupt
has to be injected I prefer to try to inject it ASAP. PIC and APIC
effectively have 2 element interrupt queue (irr/isr) so injection may
succeed even though ack was not yet received.

--
			Gleb.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c
index b857ca3..0b63991 100644
--- a/arch/x86/kvm/i8254.c
+++ b/arch/x86/kvm/i8254.c
@@ -231,20 +231,7 @@  int pit_has_pending_timer(struct kvm_vcpu *vcpu)
 {
 	struct kvm_pit *pit = vcpu->kvm->arch.vpit;
 
-	if (pit && kvm_vcpu_is_bsp(vcpu) && pit->pit_state.irq_ack)
-		return atomic_read(&pit->pit_state.pit_timer.pending);
-	return 0;
-}
-
-static void kvm_pit_ack_irq(struct kvm_irq_ack_notifier *kian)
-{
-	struct kvm_kpit_state *ps = container_of(kian, struct kvm_kpit_state,
-						 irq_ack_notifier);
-	spin_lock(&ps->inject_lock);
-	if (atomic_dec_return(&ps->pit_timer.pending) < 0)
-		atomic_inc(&ps->pit_timer.pending);
-	ps->irq_ack = 1;
-	spin_unlock(&ps->inject_lock);
+	return atomic_read(&pit->pit_state.pit_timer.pending);
 }
 
 void __kvm_migrate_pit_timer(struct kvm_vcpu *vcpu)
@@ -297,7 +284,6 @@  static void create_pit_timer(struct kvm_kpit_state *ps, u32 val, int is_period)
 	pt->vcpu = pt->kvm->bsp_vcpu;
 
 	atomic_set(&pt->pending, 0);
-	ps->irq_ack = 1;
 
 	hrtimer_start(&pt->timer, ktime_add_ns(ktime_get(), interval),
 		      HRTIMER_MODE_ABS);
@@ -577,17 +563,6 @@  void kvm_pit_reset(struct kvm_pit *pit)
 	mutex_unlock(&pit->pit_state.lock);
 
 	atomic_set(&pit->pit_state.pit_timer.pending, 0);
-	pit->pit_state.irq_ack = 1;
-}
-
-static void pit_mask_notifer(struct kvm_irq_mask_notifier *kimn, bool mask)
-{
-	struct kvm_pit *pit = container_of(kimn, struct kvm_pit, mask_notifier);
-
-	if (!mask) {
-		atomic_set(&pit->pit_state.pit_timer.pending, 0);
-		pit->pit_state.irq_ack = 1;
-	}
 }
 
 static const struct kvm_io_device_ops pit_dev_ops = {
@@ -619,7 +594,6 @@  struct kvm_pit *kvm_create_pit(struct kvm *kvm, u32 flags)
 
 	mutex_init(&pit->pit_state.lock);
 	mutex_lock(&pit->pit_state.lock);
-	spin_lock_init(&pit->pit_state.inject_lock);
 
 	kvm->arch.vpit = pit;
 	pit->kvm = kvm;
@@ -628,17 +602,11 @@  struct kvm_pit *kvm_create_pit(struct kvm *kvm, u32 flags)
 	pit_state->pit = pit;
 	hrtimer_init(&pit_state->pit_timer.timer,
 		     CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
-	pit_state->irq_ack_notifier.gsi = 0;
-	pit_state->irq_ack_notifier.irq_acked = kvm_pit_ack_irq;
-	kvm_register_irq_ack_notifier(kvm, &pit_state->irq_ack_notifier);
 	pit_state->pit_timer.reinject = true;
 	mutex_unlock(&pit->pit_state.lock);
 
 	kvm_pit_reset(pit);
 
-	pit->mask_notifier.func = pit_mask_notifer;
-	kvm_register_irq_mask_notifier(kvm, 0, &pit->mask_notifier);
-
 	kvm_iodevice_init(&pit->dev, &pit_dev_ops);
 	ret = __kvm_io_bus_register_dev(&kvm->pio_bus, &pit->dev);
 	if (ret < 0)
@@ -670,10 +638,6 @@  void kvm_free_pit(struct kvm *kvm)
 	struct hrtimer *timer;
 
 	if (kvm->arch.vpit) {
-		kvm_unregister_irq_mask_notifier(kvm, 0,
-					       &kvm->arch.vpit->mask_notifier);
-		kvm_unregister_irq_ack_notifier(kvm,
-				&kvm->arch.vpit->pit_state.irq_ack_notifier);
 		mutex_lock(&kvm->arch.vpit->pit_state.lock);
 		timer = &kvm->arch.vpit->pit_state.pit_timer.timer;
 		hrtimer_cancel(timer);
@@ -683,12 +647,12 @@  void kvm_free_pit(struct kvm *kvm)
 	}
 }
 
-static void __inject_pit_timer_intr(struct kvm *kvm)
+static int __inject_pit_timer_intr(struct kvm *kvm)
 {
 	struct kvm_vcpu *vcpu;
-	int i;
+	int i, r;
 
-	kvm_set_irq(kvm, kvm->arch.vpit->irq_source_id, 0, 1);
+	r = kvm_set_irq(kvm, kvm->arch.vpit->irq_source_id, 0, 1);
 	kvm_set_irq(kvm, kvm->arch.vpit->irq_source_id, 0, 0);
 
 	/*
@@ -703,6 +667,8 @@  static void __inject_pit_timer_intr(struct kvm *kvm)
 	if (kvm->arch.vapics_in_nmi_mode > 0)
 		kvm_for_each_vcpu(i, vcpu, kvm)
 			kvm_apic_nmi_wd_deliver(vcpu);
+
+	return r;
 }
 
 void kvm_inject_pit_timer_irqs(struct kvm_vcpu *vcpu)
@@ -711,20 +677,14 @@  void kvm_inject_pit_timer_irqs(struct kvm_vcpu *vcpu)
 	struct kvm *kvm = vcpu->kvm;
 	struct kvm_kpit_state *ps;
 
-	if (pit) {
-		int inject = 0;
-		ps = &pit->pit_state;
-
-		/* Try to inject pending interrupts when
-		 * last one has been acked.
-		 */
-		spin_lock(&ps->inject_lock);
-		if (atomic_read(&ps->pit_timer.pending) && ps->irq_ack) {
-			ps->irq_ack = 0;
-			inject = 1;
-		}
-		spin_unlock(&ps->inject_lock);
-		if (inject)
-			__inject_pit_timer_intr(kvm);
-	}
+	if (!pit)
+		return;
+
+	ps = &pit->pit_state;
+
+	if (!atomic_read(&ps->pit_timer.pending))
+		return;
+
+	if (__inject_pit_timer_intr(kvm))
+		atomic_dec(&ps->pit_timer.pending);
 }
diff --git a/arch/x86/kvm/i8254.h b/arch/x86/kvm/i8254.h
index d4c1c7f..19937da 100644
--- a/arch/x86/kvm/i8254.h
+++ b/arch/x86/kvm/i8254.h
@@ -28,8 +28,6 @@  struct kvm_kpit_state {
 	struct mutex lock;
 	struct kvm_pit *pit;
 	spinlock_t inject_lock;
-	unsigned long irq_ack;
-	struct kvm_irq_ack_notifier irq_ack_notifier;
 };
 
 struct kvm_pit {
@@ -39,7 +37,6 @@  struct kvm_pit {
 	struct kvm *kvm;
 	struct kvm_kpit_state pit_state;
 	int irq_source_id;
-	struct kvm_irq_mask_notifier mask_notifier;
 };
 
 #define KVM_PIT_BASE_ADDRESS	    0x40