diff mbox

[v6,5/5] KVM : VMX: Use posted interrupt to deliver virtual interrupt

Message ID 1363354271-25302-6-git-send-email-yang.z.zhang@intel.com (mailing list archive)
State New, archived
Headers show

Commit Message

Zhang, Yang Z March 15, 2013, 1:31 p.m. UTC
From: Yang Zhang <yang.z.zhang@Intel.com>

If posted interrupt is avaliable, then uses it to inject virtual
interrupt to guest.

Signed-off-by: Yang Zhang <yang.z.zhang@Intel.com>
---
 arch/x86/kvm/irq.c   |    3 ++-
 arch/x86/kvm/lapic.c |   16 +++++++++++++---
 arch/x86/kvm/lapic.h |    1 +
 arch/x86/kvm/vmx.c   |   11 +++++++++++
 arch/x86/kvm/x86.c   |    4 ++++
 5 files changed, 31 insertions(+), 4 deletions(-)

Comments

Gleb Natapov March 19, 2013, 8:54 a.m. UTC | #1
On Fri, Mar 15, 2013 at 09:31:11PM +0800, Yang Zhang wrote:
> From: Yang Zhang <yang.z.zhang@Intel.com>
> 
> If posted interrupt is avaliable, then uses it to inject virtual
> interrupt to guest.
> 
> Signed-off-by: Yang Zhang <yang.z.zhang@Intel.com>
> ---
>  arch/x86/kvm/irq.c   |    3 ++-
>  arch/x86/kvm/lapic.c |   16 +++++++++++++---
>  arch/x86/kvm/lapic.h |    1 +
>  arch/x86/kvm/vmx.c   |   11 +++++++++++
>  arch/x86/kvm/x86.c   |    4 ++++
>  5 files changed, 31 insertions(+), 4 deletions(-)
> 
> diff --git a/arch/x86/kvm/irq.c b/arch/x86/kvm/irq.c
> index 484bc87..5179988 100644
> --- a/arch/x86/kvm/irq.c
> +++ b/arch/x86/kvm/irq.c
> @@ -81,7 +81,8 @@ int kvm_cpu_has_interrupt(struct kvm_vcpu *v)
>  	if (kvm_cpu_has_extint(v))
>  		return 1;
>  
> -	return kvm_apic_has_interrupt(v) != -1;	/* LAPIC */
> +	return (kvm_apic_has_interrupt(v) != -1) ||
> +		kvm_hwapic_has_interrupt(v);
That's incorrect. kvm_cpu_has_interrupt() should return true only it
there is IRR suitable to be injected, not just any IRR.
kvm_apic_has_interrupt() should call kvm_apic_update_irr().

>  }
>  EXPORT_SYMBOL_GPL(kvm_cpu_has_interrupt);
>  
> diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
> index b3ea50e..46c7310 100644
> --- a/arch/x86/kvm/lapic.c
> +++ b/arch/x86/kvm/lapic.c
> @@ -713,7 +713,10 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode,
>  		} else
>  			apic_clear_vector(vector, apic->regs + APIC_TMR);
>  
> -		result = !apic_test_and_set_irr(vector, apic);
> +		result = 1;
> +		if (!kvm_x86_ops->deliver_posted_interrupt(vcpu, vector))
> +			result = !apic_test_and_set_irr(vector, apic);
> +
>  		trace_kvm_apic_accept_irq(vcpu->vcpu_id, delivery_mode,
>  					  trig_mode, vector, !result);
>  		if (!result) {
> @@ -723,8 +726,10 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode,
>  			break;
>  		}
>  
> -		kvm_make_request(KVM_REQ_EVENT, vcpu);
> -		kvm_vcpu_kick(vcpu);
> +		if (!kvm_x86_ops->vm_has_apicv(vcpu->kvm)) {
> +			kvm_make_request(KVM_REQ_EVENT, vcpu);
> +			kvm_vcpu_kick(vcpu);
> +		}
>  		break;
apicv code and non apicv code are completely different. What's the point
checking for apicv twice here?
Just do:

if (kvm_x86_ops->deliver_posted_interrupt)
	kvm_x86_ops->deliver_posted_interrupt(vcpu, vector)
else {
	result = !apic_test_and_set_irr(vector, apic);
	kvm_make_request(KVM_REQ_EVENT, vcpu);
	kvm_vcpu_kick(vcpu);
}

And set kvm_x86_ops->deliver_posted_interrupt only if apicv is enabled.

Also rearrange patches so that APIC_TMR handling goes before posted
interrupt series.

>  
>  	case APIC_DM_REMRD:
> @@ -1604,6 +1609,11 @@ int kvm_apic_has_interrupt(struct kvm_vcpu *vcpu)
>  	return highest_irr;
>  }
>  
> +bool kvm_hwapic_has_interrupt(struct kvm_vcpu *vcpu)
> +{
> +	return kvm_x86_ops->hwapic_has_interrupt(vcpu);
> +}
> +
>  int kvm_apic_accept_pic_intr(struct kvm_vcpu *vcpu)
>  {
>  	u32 lvt0 = kvm_apic_get_reg(vcpu->arch.apic, APIC_LVT0);
> diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h
> index e5327be..c6abc63 100644
> --- a/arch/x86/kvm/lapic.h
> +++ b/arch/x86/kvm/lapic.h
> @@ -37,6 +37,7 @@ int kvm_create_lapic(struct kvm_vcpu *vcpu);
>  void kvm_free_lapic(struct kvm_vcpu *vcpu);
>  
>  int kvm_apic_has_interrupt(struct kvm_vcpu *vcpu);
> +bool kvm_hwapic_has_interrupt(struct kvm_vcpu *vcpu);
>  int kvm_apic_accept_pic_intr(struct kvm_vcpu *vcpu);
>  int kvm_get_apic_interrupt(struct kvm_vcpu *vcpu);
>  void kvm_lapic_reset(struct kvm_vcpu *vcpu);
> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
> index 0b5a8ae..48a2239 100644
> --- a/arch/x86/kvm/vmx.c
> +++ b/arch/x86/kvm/vmx.c
> @@ -3932,6 +3932,17 @@ static void vmx_posted_intr_clear_on(struct kvm_vcpu *vcpu)
>  	clear_bit(POSTED_INTR_ON, (unsigned long *)&vmx->pi_desc.u.control);
>  }
>  
> +/*
> + * Send interrupt to vcpu via posted interrupt way.
> + * Return false if posted interrupt is not supported and the caller will
> + * roll back to old way(via set vIRR).
> + * Return true if posted interrupt is avalialbe, the interrupt is set
> + * in pir(posted interrupt requests):
> + * 1. If target vcpu is running(non-root mode), send posted interrupt
> + * notification to vcpu and hardware will sync pir to vIRR atomically.
> + * 2. If target vcpu isn't running(root mode), kick it to pick up the
> + * interrupt from pir in next vmentry.
> + */
The comment should go into previous patch. Also I prefer to not check
for posted interrupt inside the callback, but set it to NULL instead.
This way we avoid calling a callback on a hot path needlessly.

>  static bool vmx_deliver_posted_interrupt(struct kvm_vcpu *vcpu, int vector)
>  {
>  	struct vcpu_vmx *vmx = to_vmx(vcpu);
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index 0baa90d..0981100 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -2679,6 +2679,7 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
>  static int kvm_vcpu_ioctl_get_lapic(struct kvm_vcpu *vcpu,
>  				    struct kvm_lapic_state *s)
>  {
> +	kvm_x86_ops->sync_pir_to_irr(vcpu);
>  	memcpy(s->regs, vcpu->arch.apic->regs, sizeof *s);
>  
>  	return 0;
> @@ -5699,6 +5700,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
>  	}
>  
>  	if (kvm_check_request(KVM_REQ_EVENT, vcpu) || req_int_win) {
> +		kvm_x86_ops->sync_pir_to_irr(vcpu);
>  		inject_pending_event(vcpu);
>  
>  		/* enable NMI/IRQ window open exits if needed */
> @@ -5741,6 +5743,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
>  
>  	local_irq_disable();
>  
> +	kvm_x86_ops->posted_intr_clear_on(vcpu);
> +
Why is this separate from pir_to_irr syncing?

>  	if (vcpu->mode == EXITING_GUEST_MODE || vcpu->requests
>  	    || need_resched() || signal_pending(current)) {
>  		vcpu->mode = OUTSIDE_GUEST_MODE;
> -- 
> 1.7.1

--
			Gleb.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Zhang, Yang Z March 19, 2013, 12:11 p.m. UTC | #2
Gleb Natapov wrote on 2013-03-19:
> On Fri, Mar 15, 2013 at 09:31:11PM +0800, Yang Zhang wrote:
>> From: Yang Zhang <yang.z.zhang@Intel.com>
>> 
>> If posted interrupt is avaliable, then uses it to inject virtual
>> interrupt to guest.
>> 
>> Signed-off-by: Yang Zhang <yang.z.zhang@Intel.com>
>> ---
>>  arch/x86/kvm/irq.c   |    3 ++-
>>  arch/x86/kvm/lapic.c |   16 +++++++++++++---
>>  arch/x86/kvm/lapic.h |    1 +
>>  arch/x86/kvm/vmx.c   |   11 +++++++++++
>>  arch/x86/kvm/x86.c   |    4 ++++
>>  5 files changed, 31 insertions(+), 4 deletions(-)
>> diff --git a/arch/x86/kvm/irq.c b/arch/x86/kvm/irq.c
>> index 484bc87..5179988 100644
>> --- a/arch/x86/kvm/irq.c
>> +++ b/arch/x86/kvm/irq.c
>> @@ -81,7 +81,8 @@ int kvm_cpu_has_interrupt(struct kvm_vcpu *v)
>>  	if (kvm_cpu_has_extint(v))
>>  		return 1;
>> -	return kvm_apic_has_interrupt(v) != -1;	/* LAPIC */
>> +	return (kvm_apic_has_interrupt(v) != -1) ||
>> +		kvm_hwapic_has_interrupt(v);
> That's incorrect. kvm_cpu_has_interrupt() should return true only it
> there is IRR suitable to be injected, not just any IRR.
> kvm_apic_has_interrupt() should call kvm_apic_update_irr().
You are right.

>>  }
>>  EXPORT_SYMBOL_GPL(kvm_cpu_has_interrupt);
>> diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
>> index b3ea50e..46c7310 100644
>> --- a/arch/x86/kvm/lapic.c
>> +++ b/arch/x86/kvm/lapic.c
>> @@ -713,7 +713,10 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int
> delivery_mode,
>>  		} else
>>  			apic_clear_vector(vector, apic->regs + APIC_TMR);
>> -		result = !apic_test_and_set_irr(vector, apic);
>> +		result = 1;
>> +		if (!kvm_x86_ops->deliver_posted_interrupt(vcpu, vector))
>> +			result = !apic_test_and_set_irr(vector, apic);
>> +
>>  		trace_kvm_apic_accept_irq(vcpu->vcpu_id, delivery_mode,
>>  					  trig_mode, vector, !result);
>>  		if (!result) {
>> @@ -723,8 +726,10 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int
> delivery_mode,
>>  			break;
>>  		}
>> -		kvm_make_request(KVM_REQ_EVENT, vcpu);
>> -		kvm_vcpu_kick(vcpu);
>> +		if (!kvm_x86_ops->vm_has_apicv(vcpu->kvm)) {
>> +			kvm_make_request(KVM_REQ_EVENT, vcpu);
>> +			kvm_vcpu_kick(vcpu);
>> +		}
>>  		break;
> apicv code and non apicv code are completely different. What's the point
> checking for apicv twice here?
> Just do:
> 
> if (kvm_x86_ops->deliver_posted_interrupt)
> 	kvm_x86_ops->deliver_posted_interrupt(vcpu, vector)
> else {
> 	result = !apic_test_and_set_irr(vector, apic);
> 	kvm_make_request(KVM_REQ_EVENT, vcpu);
> 	kvm_vcpu_kick(vcpu);
> }
> 
> And set kvm_x86_ops->deliver_posted_interrupt only if apicv is enabled.
> 
> Also rearrange patches so that APIC_TMR handling goes before posted
> interrupt series.
Sure. 

>> 
>>  	case APIC_DM_REMRD: @@ -1604,6 +1609,11 @@ int
>>  kvm_apic_has_interrupt(struct kvm_vcpu *vcpu) 	return highest_irr; }
>> +bool kvm_hwapic_has_interrupt(struct kvm_vcpu *vcpu)
>> +{
>> +	return kvm_x86_ops->hwapic_has_interrupt(vcpu);
>> +}
>> +
>>  int kvm_apic_accept_pic_intr(struct kvm_vcpu *vcpu)
>>  {
>>  	u32 lvt0 = kvm_apic_get_reg(vcpu->arch.apic, APIC_LVT0);
>> diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h
>> index e5327be..c6abc63 100644
>> --- a/arch/x86/kvm/lapic.h
>> +++ b/arch/x86/kvm/lapic.h
>> @@ -37,6 +37,7 @@ int kvm_create_lapic(struct kvm_vcpu *vcpu);
>>  void kvm_free_lapic(struct kvm_vcpu *vcpu);
>>  
>>  int kvm_apic_has_interrupt(struct kvm_vcpu *vcpu); +bool
>>  kvm_hwapic_has_interrupt(struct kvm_vcpu *vcpu); int
>>  kvm_apic_accept_pic_intr(struct kvm_vcpu *vcpu); int
>>  kvm_get_apic_interrupt(struct kvm_vcpu *vcpu); void
>>  kvm_lapic_reset(struct kvm_vcpu *vcpu);
>> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
>> index 0b5a8ae..48a2239 100644
>> --- a/arch/x86/kvm/vmx.c
>> +++ b/arch/x86/kvm/vmx.c
>> @@ -3932,6 +3932,17 @@ static void vmx_posted_intr_clear_on(struct
> kvm_vcpu *vcpu)
>>  	clear_bit(POSTED_INTR_ON, (unsigned long *)&vmx->pi_desc.u.control);
>>  }
>> +/*
>> + * Send interrupt to vcpu via posted interrupt way.
>> + * Return false if posted interrupt is not supported and the caller will
>> + * roll back to old way(via set vIRR).
>> + * Return true if posted interrupt is avalialbe, the interrupt is set
>> + * in pir(posted interrupt requests):
>> + * 1. If target vcpu is running(non-root mode), send posted interrupt
>> + * notification to vcpu and hardware will sync pir to vIRR atomically.
>> + * 2. If target vcpu isn't running(root mode), kick it to pick up the
>> + * interrupt from pir in next vmentry.
>> + */
> The comment should go into previous patch. Also I prefer to not check
> for posted interrupt inside the callback, but set it to NULL instead.
> This way we avoid calling a callback on a hot path needlessly.
It's make sense. So just follow the logic you mentioned above?
 
>>  static bool vmx_deliver_posted_interrupt(struct kvm_vcpu *vcpu, int vector)
>>  {
>>  	struct vcpu_vmx *vmx = to_vmx(vcpu);
>> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
>> index 0baa90d..0981100 100644
>> --- a/arch/x86/kvm/x86.c
>> +++ b/arch/x86/kvm/x86.c
>> @@ -2679,6 +2679,7 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
>>  static int kvm_vcpu_ioctl_get_lapic(struct kvm_vcpu *vcpu, 				   
>>  struct kvm_lapic_state *s) { +	kvm_x86_ops->sync_pir_to_irr(vcpu);
>>  	memcpy(s->regs, vcpu->arch.apic->regs, sizeof *s);
>>  
>>  	return 0; @@ -5699,6 +5700,7 @@ static int vcpu_enter_guest(struct
>>  kvm_vcpu *vcpu) 	}
>>  
>>  	if (kvm_check_request(KVM_REQ_EVENT, vcpu) || req_int_win) {
>>  +		kvm_x86_ops->sync_pir_to_irr(vcpu); 		inject_pending_event(vcpu);
>>  
>>  		/* enable NMI/IRQ window open exits if needed */
>> @@ -5741,6 +5743,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
>> 
>>  	local_irq_disable();
>> +	kvm_x86_ops->posted_intr_clear_on(vcpu);
>> +
> Why is this separate from pir_to_irr syncing?
This is the result of discussion with Marcelo.
It is more reasonable to put it here to avoid unnecessary posted interrupt between:

vcpu->mode = IN_GUEST_MODE;

<--interrupt may arrived here and this is unnecessary.

local_irq_disable();

>>  	if (vcpu->mode == EXITING_GUEST_MODE || vcpu->requests
>>  	    || need_resched() || signal_pending(current)) {
>>  		vcpu->mode = OUTSIDE_GUEST_MODE;
>> --
>> 1.7.1
> 
> --
> 			Gleb.


Best regards,
Yang

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Gleb Natapov March 19, 2013, 12:23 p.m. UTC | #3
On Tue, Mar 19, 2013 at 12:11:47PM +0000, Zhang, Yang Z wrote:
> Gleb Natapov wrote on 2013-03-19:
> > On Fri, Mar 15, 2013 at 09:31:11PM +0800, Yang Zhang wrote:
> >> From: Yang Zhang <yang.z.zhang@Intel.com>
> >> 
> >> If posted interrupt is avaliable, then uses it to inject virtual
> >> interrupt to guest.
> >> 
> >> Signed-off-by: Yang Zhang <yang.z.zhang@Intel.com>
> >> ---
> >>  arch/x86/kvm/irq.c   |    3 ++-
> >>  arch/x86/kvm/lapic.c |   16 +++++++++++++---
> >>  arch/x86/kvm/lapic.h |    1 +
> >>  arch/x86/kvm/vmx.c   |   11 +++++++++++
> >>  arch/x86/kvm/x86.c   |    4 ++++
> >>  5 files changed, 31 insertions(+), 4 deletions(-)
> >> diff --git a/arch/x86/kvm/irq.c b/arch/x86/kvm/irq.c
> >> index 484bc87..5179988 100644
> >> --- a/arch/x86/kvm/irq.c
> >> +++ b/arch/x86/kvm/irq.c
> >> @@ -81,7 +81,8 @@ int kvm_cpu_has_interrupt(struct kvm_vcpu *v)
> >>  	if (kvm_cpu_has_extint(v))
> >>  		return 1;
> >> -	return kvm_apic_has_interrupt(v) != -1;	/* LAPIC */
> >> +	return (kvm_apic_has_interrupt(v) != -1) ||
> >> +		kvm_hwapic_has_interrupt(v);
> > That's incorrect. kvm_cpu_has_interrupt() should return true only it
> > there is IRR suitable to be injected, not just any IRR.
> > kvm_apic_has_interrupt() should call kvm_apic_update_irr().
> You are right.
> 
> >>  }
> >>  EXPORT_SYMBOL_GPL(kvm_cpu_has_interrupt);
> >> diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
> >> index b3ea50e..46c7310 100644
> >> --- a/arch/x86/kvm/lapic.c
> >> +++ b/arch/x86/kvm/lapic.c
> >> @@ -713,7 +713,10 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int
> > delivery_mode,
> >>  		} else
> >>  			apic_clear_vector(vector, apic->regs + APIC_TMR);
> >> -		result = !apic_test_and_set_irr(vector, apic);
> >> +		result = 1;
> >> +		if (!kvm_x86_ops->deliver_posted_interrupt(vcpu, vector))
> >> +			result = !apic_test_and_set_irr(vector, apic);
> >> +
> >>  		trace_kvm_apic_accept_irq(vcpu->vcpu_id, delivery_mode,
> >>  					  trig_mode, vector, !result);
> >>  		if (!result) {
> >> @@ -723,8 +726,10 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int
> > delivery_mode,
> >>  			break;
> >>  		}
> >> -		kvm_make_request(KVM_REQ_EVENT, vcpu);
> >> -		kvm_vcpu_kick(vcpu);
> >> +		if (!kvm_x86_ops->vm_has_apicv(vcpu->kvm)) {
> >> +			kvm_make_request(KVM_REQ_EVENT, vcpu);
> >> +			kvm_vcpu_kick(vcpu);
> >> +		}
> >>  		break;
> > apicv code and non apicv code are completely different. What's the point
> > checking for apicv twice here?
> > Just do:
> > 
> > if (kvm_x86_ops->deliver_posted_interrupt)
> > 	kvm_x86_ops->deliver_posted_interrupt(vcpu, vector)
> > else {
> > 	result = !apic_test_and_set_irr(vector, apic);
> > 	kvm_make_request(KVM_REQ_EVENT, vcpu);
> > 	kvm_vcpu_kick(vcpu);
> > }
> > 
> > And set kvm_x86_ops->deliver_posted_interrupt only if apicv is enabled.
> > 
> > Also rearrange patches so that APIC_TMR handling goes before posted
> > interrupt series.
> Sure. 
> 
> >> 
> >>  	case APIC_DM_REMRD: @@ -1604,6 +1609,11 @@ int
> >>  kvm_apic_has_interrupt(struct kvm_vcpu *vcpu) 	return highest_irr; }
> >> +bool kvm_hwapic_has_interrupt(struct kvm_vcpu *vcpu)
> >> +{
> >> +	return kvm_x86_ops->hwapic_has_interrupt(vcpu);
> >> +}
> >> +
> >>  int kvm_apic_accept_pic_intr(struct kvm_vcpu *vcpu)
> >>  {
> >>  	u32 lvt0 = kvm_apic_get_reg(vcpu->arch.apic, APIC_LVT0);
> >> diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h
> >> index e5327be..c6abc63 100644
> >> --- a/arch/x86/kvm/lapic.h
> >> +++ b/arch/x86/kvm/lapic.h
> >> @@ -37,6 +37,7 @@ int kvm_create_lapic(struct kvm_vcpu *vcpu);
> >>  void kvm_free_lapic(struct kvm_vcpu *vcpu);
> >>  
> >>  int kvm_apic_has_interrupt(struct kvm_vcpu *vcpu); +bool
> >>  kvm_hwapic_has_interrupt(struct kvm_vcpu *vcpu); int
> >>  kvm_apic_accept_pic_intr(struct kvm_vcpu *vcpu); int
> >>  kvm_get_apic_interrupt(struct kvm_vcpu *vcpu); void
> >>  kvm_lapic_reset(struct kvm_vcpu *vcpu);
> >> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
> >> index 0b5a8ae..48a2239 100644
> >> --- a/arch/x86/kvm/vmx.c
> >> +++ b/arch/x86/kvm/vmx.c
> >> @@ -3932,6 +3932,17 @@ static void vmx_posted_intr_clear_on(struct
> > kvm_vcpu *vcpu)
> >>  	clear_bit(POSTED_INTR_ON, (unsigned long *)&vmx->pi_desc.u.control);
> >>  }
> >> +/*
> >> + * Send interrupt to vcpu via posted interrupt way.
> >> + * Return false if posted interrupt is not supported and the caller will
> >> + * roll back to old way(via set vIRR).
> >> + * Return true if posted interrupt is avalialbe, the interrupt is set
> >> + * in pir(posted interrupt requests):
> >> + * 1. If target vcpu is running(non-root mode), send posted interrupt
> >> + * notification to vcpu and hardware will sync pir to vIRR atomically.
> >> + * 2. If target vcpu isn't running(root mode), kick it to pick up the
> >> + * interrupt from pir in next vmentry.
> >> + */
> > The comment should go into previous patch. Also I prefer to not check
> > for posted interrupt inside the callback, but set it to NULL instead.
> > This way we avoid calling a callback on a hot path needlessly.
> It's make sense. So just follow the logic you mentioned above?
>  
Yes.

> >>  static bool vmx_deliver_posted_interrupt(struct kvm_vcpu *vcpu, int vector)
> >>  {
> >>  	struct vcpu_vmx *vmx = to_vmx(vcpu);
> >> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> >> index 0baa90d..0981100 100644
> >> --- a/arch/x86/kvm/x86.c
> >> +++ b/arch/x86/kvm/x86.c
> >> @@ -2679,6 +2679,7 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
> >>  static int kvm_vcpu_ioctl_get_lapic(struct kvm_vcpu *vcpu, 				   
> >>  struct kvm_lapic_state *s) { +	kvm_x86_ops->sync_pir_to_irr(vcpu);
> >>  	memcpy(s->regs, vcpu->arch.apic->regs, sizeof *s);
> >>  
> >>  	return 0; @@ -5699,6 +5700,7 @@ static int vcpu_enter_guest(struct
> >>  kvm_vcpu *vcpu) 	}
> >>  
> >>  	if (kvm_check_request(KVM_REQ_EVENT, vcpu) || req_int_win) {
> >>  +		kvm_x86_ops->sync_pir_to_irr(vcpu); 		inject_pending_event(vcpu);
> >>  
> >>  		/* enable NMI/IRQ window open exits if needed */
> >> @@ -5741,6 +5743,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
> >> 
> >>  	local_irq_disable();
> >> +	kvm_x86_ops->posted_intr_clear_on(vcpu);
> >> +
> > Why is this separate from pir_to_irr syncing?
> This is the result of discussion with Marcelo.
> It is more reasonable to put it here to avoid unnecessary posted interrupt between:
> 
> vcpu->mode = IN_GUEST_MODE;
> 
> <--interrupt may arrived here and this is unnecessary.
> 
> local_irq_disable();
> 

But this still can happen as far as I see:

vcpu0                                         vcpu1: 
pi_test_and_set_pir()
kvm_make_request(KVM_REQ_EVENT)
                                            if (KVM_REQ_EVENT)
                                                   sync_pir_to_irr()
                                            vcpu->mode = IN_GUEST_MODE;
if (vcpu->mode == IN_GUEST_MODE)
  if (!pi_test_and_set_on())
    apic->send_IPI_mask()
                                            --> IPI arrives here
                                            local_irq_disable();
                                            posted_intr_clear_on()


May be move vcpu->mode = IN_GUEST_MODE after local_irq_disable()?

--
			Gleb.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Zhang, Yang Z March 19, 2013, 12:42 p.m. UTC | #4
Gleb Natapov wrote on 2013-03-19:
> On Tue, Mar 19, 2013 at 12:11:47PM +0000, Zhang, Yang Z wrote:
>> Gleb Natapov wrote on 2013-03-19:
>>> On Fri, Mar 15, 2013 at 09:31:11PM +0800, Yang Zhang wrote:
>>>> From: Yang Zhang <yang.z.zhang@Intel.com>
>>>> 
>>>> If posted interrupt is avaliable, then uses it to inject virtual
>>>> interrupt to guest.
>>>> 
>>>> Signed-off-by: Yang Zhang <yang.z.zhang@Intel.com>
>>>> ---
>>>>  arch/x86/kvm/irq.c   |    3 ++-
>>>>  arch/x86/kvm/lapic.c |   16 +++++++++++++---
>>>>  arch/x86/kvm/lapic.h |    1 +
>>>>  arch/x86/kvm/vmx.c   |   11 +++++++++++
>>>>  arch/x86/kvm/x86.c   |    4 ++++
>>>>  5 files changed, 31 insertions(+), 4 deletions(-)
>>>> diff --git a/arch/x86/kvm/irq.c b/arch/x86/kvm/irq.c
>>>> index 484bc87..5179988 100644
>>>> --- a/arch/x86/kvm/irq.c
>>>> +++ b/arch/x86/kvm/irq.c
>>>> @@ -81,7 +81,8 @@ int kvm_cpu_has_interrupt(struct kvm_vcpu *v)
>>>>  	if (kvm_cpu_has_extint(v))
>>>>  		return 1;
>>>> -	return kvm_apic_has_interrupt(v) != -1;	/* LAPIC */
>>>> +	return (kvm_apic_has_interrupt(v) != -1) ||
>>>> +		kvm_hwapic_has_interrupt(v);
>>> That's incorrect. kvm_cpu_has_interrupt() should return true only it
>>> there is IRR suitable to be injected, not just any IRR.
>>> kvm_apic_has_interrupt() should call kvm_apic_update_irr().
>> You are right.
>> 
>>>>  }
>>>>  EXPORT_SYMBOL_GPL(kvm_cpu_has_interrupt);
>>>> diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
>>>> index b3ea50e..46c7310 100644
>>>> --- a/arch/x86/kvm/lapic.c
>>>> +++ b/arch/x86/kvm/lapic.c
>>>> @@ -713,7 +713,10 @@ static int __apic_accept_irq(struct kvm_lapic *apic,
> int
>>> delivery_mode,
>>>>  		} else
>>>>  			apic_clear_vector(vector, apic->regs + APIC_TMR);
>>>> -		result = !apic_test_and_set_irr(vector, apic);
>>>> +		result = 1;
>>>> +		if (!kvm_x86_ops->deliver_posted_interrupt(vcpu, vector))
>>>> +			result = !apic_test_and_set_irr(vector, apic);
>>>> +
>>>>  		trace_kvm_apic_accept_irq(vcpu->vcpu_id, delivery_mode,
>>>>  					  trig_mode, vector, !result);
>>>>  		if (!result) {
>>>> @@ -723,8 +726,10 @@ static int __apic_accept_irq(struct kvm_lapic *apic,
> int
>>> delivery_mode,
>>>>  			break;
>>>>  		}
>>>> -		kvm_make_request(KVM_REQ_EVENT, vcpu);
>>>> -		kvm_vcpu_kick(vcpu);
>>>> +		if (!kvm_x86_ops->vm_has_apicv(vcpu->kvm)) {
>>>> +			kvm_make_request(KVM_REQ_EVENT, vcpu);
>>>> +			kvm_vcpu_kick(vcpu);
>>>> +		}
>>>>  		break;
>>> apicv code and non apicv code are completely different. What's the point
>>> checking for apicv twice here?
>>> Just do:
>>> 
>>> if (kvm_x86_ops->deliver_posted_interrupt)
>>> 	kvm_x86_ops->deliver_posted_interrupt(vcpu, vector)
>>> else {
>>> 	result = !apic_test_and_set_irr(vector, apic);
>>> 	kvm_make_request(KVM_REQ_EVENT, vcpu);
>>> 	kvm_vcpu_kick(vcpu);
>>> }
>>> 
>>> And set kvm_x86_ops->deliver_posted_interrupt only if apicv is enabled.
>>> 
>>> Also rearrange patches so that APIC_TMR handling goes before posted
>>> interrupt series.
>> Sure.
>> 
>>>> 
>>>>  	case APIC_DM_REMRD: @@ -1604,6 +1609,11 @@ int
>>>>  kvm_apic_has_interrupt(struct kvm_vcpu *vcpu) 	return highest_irr; }
>>>> +bool kvm_hwapic_has_interrupt(struct kvm_vcpu *vcpu)
>>>> +{
>>>> +	return kvm_x86_ops->hwapic_has_interrupt(vcpu);
>>>> +}
>>>> +
>>>>  int kvm_apic_accept_pic_intr(struct kvm_vcpu *vcpu)
>>>>  {
>>>>  	u32 lvt0 = kvm_apic_get_reg(vcpu->arch.apic, APIC_LVT0);
>>>> diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h
>>>> index e5327be..c6abc63 100644
>>>> --- a/arch/x86/kvm/lapic.h
>>>> +++ b/arch/x86/kvm/lapic.h
>>>> @@ -37,6 +37,7 @@ int kvm_create_lapic(struct kvm_vcpu *vcpu);
>>>>  void kvm_free_lapic(struct kvm_vcpu *vcpu);
>>>>  
>>>>  int kvm_apic_has_interrupt(struct kvm_vcpu *vcpu); +bool
>>>>  kvm_hwapic_has_interrupt(struct kvm_vcpu *vcpu); int
>>>>  kvm_apic_accept_pic_intr(struct kvm_vcpu *vcpu); int
>>>>  kvm_get_apic_interrupt(struct kvm_vcpu *vcpu); void
>>>>  kvm_lapic_reset(struct kvm_vcpu *vcpu);
>>>> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
>>>> index 0b5a8ae..48a2239 100644
>>>> --- a/arch/x86/kvm/vmx.c
>>>> +++ b/arch/x86/kvm/vmx.c
>>>> @@ -3932,6 +3932,17 @@ static void vmx_posted_intr_clear_on(struct
>>> kvm_vcpu *vcpu)
>>>>  	clear_bit(POSTED_INTR_ON, (unsigned long
>>>>  *)&vmx->pi_desc.u.control); }
>>>> +/*
>>>> + * Send interrupt to vcpu via posted interrupt way.
>>>> + * Return false if posted interrupt is not supported and the caller will
>>>> + * roll back to old way(via set vIRR).
>>>> + * Return true if posted interrupt is avalialbe, the interrupt is set
>>>> + * in pir(posted interrupt requests):
>>>> + * 1. If target vcpu is running(non-root mode), send posted interrupt
>>>> + * notification to vcpu and hardware will sync pir to vIRR atomically.
>>>> + * 2. If target vcpu isn't running(root mode), kick it to pick up the
>>>> + * interrupt from pir in next vmentry.
>>>> + */
>>> The comment should go into previous patch. Also I prefer to not check
>>> for posted interrupt inside the callback, but set it to NULL instead.
>>> This way we avoid calling a callback on a hot path needlessly.
>> It's make sense. So just follow the logic you mentioned above?
>> 
> Yes.
> 
>>>>  static bool vmx_deliver_posted_interrupt(struct kvm_vcpu *vcpu, int
>>>>  vector) { 	struct vcpu_vmx *vmx = to_vmx(vcpu);
>>>> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
>>>> index 0baa90d..0981100 100644
>>>> --- a/arch/x86/kvm/x86.c
>>>> +++ b/arch/x86/kvm/x86.c
>>>> @@ -2679,6 +2679,7 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
>>>>  static int kvm_vcpu_ioctl_get_lapic(struct kvm_vcpu *vcpu,
>>>>  
>>>>  struct kvm_lapic_state *s) { +	kvm_x86_ops->sync_pir_to_irr(vcpu);
>>>>  	memcpy(s->regs, vcpu->arch.apic->regs, sizeof *s);
>>>>  
>>>>  	return 0; @@ -5699,6 +5700,7 @@ static int vcpu_enter_guest(struct
>>>>  kvm_vcpu *vcpu) 	}
>>>>  
>>>>  	if (kvm_check_request(KVM_REQ_EVENT, vcpu) || req_int_win) {
>>>>  +		kvm_x86_ops->sync_pir_to_irr(vcpu);
> 	inject_pending_event(vcpu);
>>>> 
>>>>  		/* enable NMI/IRQ window open exits if needed */
>>>> @@ -5741,6 +5743,8 @@ static int vcpu_enter_guest(struct kvm_vcpu
>>>> *vcpu)
>>>> 
>>>>  	local_irq_disable();
>>>> +	kvm_x86_ops->posted_intr_clear_on(vcpu);
>>>> +
>>> Why is this separate from pir_to_irr syncing?
>> This is the result of discussion with Marcelo. It is more reasonable to
>> put it here to avoid unnecessary posted interrupt between:
>> 
>> vcpu->mode = IN_GUEST_MODE;
>> 
>> <--interrupt may arrived here and this is unnecessary.
>> 
>> local_irq_disable();
>> 
> 
> But this still can happen as far as I see:
> 
> vcpu0                                         vcpu1:
> pi_test_and_set_pir() kvm_make_request(KVM_REQ_EVENT)
>                                             if (KVM_REQ_EVENT)
>                                                    sync_pir_to_irr()
>                                             vcpu->mode =
> IN_GUEST_MODE;
> if (vcpu->mode == IN_GUEST_MODE)
>   if (!pi_test_and_set_on())
>     apic->send_IPI_mask()
>                                             --> IPI arrives here
>                                             local_irq_disable();
>                                             posted_intr_clear_on()
Current solution is trying to block other Posted Interrupt from other VCPUs at same time. It only mitigates it but cannot solve it. The case you mentioned still exists but it should be rare.

> May be move vcpu->mode = IN_GUEST_MODE after local_irq_disable()?
Yes, this will solve it. But I am not sure whether it will introduce any regressions. Is there any check relies on this sequence?
 	
> --
> 			Gleb.


Best regards,
Yang


--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Gleb Natapov March 19, 2013, 1:29 p.m. UTC | #5
On Tue, Mar 19, 2013 at 12:42:01PM +0000, Zhang, Yang Z wrote:
> >>>>  	local_irq_disable();
> >>>> +	kvm_x86_ops->posted_intr_clear_on(vcpu);
> >>>> +
> >>> Why is this separate from pir_to_irr syncing?
> >> This is the result of discussion with Marcelo. It is more reasonable to
> >> put it here to avoid unnecessary posted interrupt between:
> >> 
> >> vcpu->mode = IN_GUEST_MODE;
> >> 
> >> <--interrupt may arrived here and this is unnecessary.
> >> 
> >> local_irq_disable();
> >> 
> > 
> > But this still can happen as far as I see:
> > 
> > vcpu0                                         vcpu1:
> > pi_test_and_set_pir() kvm_make_request(KVM_REQ_EVENT)
> >                                             if (KVM_REQ_EVENT)
> >                                                    sync_pir_to_irr()
> >                                             vcpu->mode =
> > IN_GUEST_MODE;
> > if (vcpu->mode == IN_GUEST_MODE)
> >   if (!pi_test_and_set_on())
> >     apic->send_IPI_mask()
> >                                             --> IPI arrives here
> >                                             local_irq_disable();
> >                                             posted_intr_clear_on()
> Current solution is trying to block other Posted Interrupt from other VCPUs at same time. It only mitigates it but cannot solve it. The case you mentioned still exists but it should be rare.
> 
I am not sure I follow. What scenario exactly are you talking about. I
looked over past discussion about it and saw that Marcelo gives an
example how IPI can be lost, but I think that's because we set "on" bit
after KVM_REQ_EVENT:

cpu0                                    cpu1            vcpu0
test_and_set_bit(PIR-A)
set KVM_REQ_EVENT
                                                        process REQ_EVENT
                                                        PIR-A->IRR

                                                        vcpu->mode=IN_GUEST

if (vcpu0->guest_mode)
        if (!t_a_s_bit(PIR notif))
                send IPI
                                                        linux_pir_handler

                                        t_a_s_b(PIR-B)=1
                                        no PIR IPI sent


But what if on delivery we do:
pi_test_and_set_pir()
r = pi_test_and_set_on()
kvm_make_request(KVM_REQ_EVENT)
if (!r)
   send_IPI_mask()
else
   kvm_vcpu_kick()

And on vcpu entry we do:
if (kvm_check_request(KVM_REQ_EVENT)
 if (test_and_clear_bit(on))
   kvm_apic_update_irr()

What are the downsides? Can we lost interrupts this way?

> > May be move vcpu->mode = IN_GUEST_MODE after local_irq_disable()?
> Yes, this will solve it. But I am not sure whether it will introduce any regressions. Is there any check relies on this sequence?
>  	
Do not think so.

--
			Gleb.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Zhang, Yang Z March 19, 2013, 1:59 p.m. UTC | #6
Gleb Natapov wrote on 2013-03-19:
> On Tue, Mar 19, 2013 at 12:42:01PM +0000, Zhang, Yang Z wrote:
>>>>>>  	local_irq_disable();
>>>>>> +	kvm_x86_ops->posted_intr_clear_on(vcpu);
>>>>>> +
>>>>> Why is this separate from pir_to_irr syncing?
>>>> This is the result of discussion with Marcelo. It is more reasonable to
>>>> put it here to avoid unnecessary posted interrupt between:
>>>> 
>>>> vcpu->mode = IN_GUEST_MODE;
>>>> 
>>>> <--interrupt may arrived here and this is unnecessary.
>>>> 
>>>> local_irq_disable();
>>>> 
>>> 
>>> But this still can happen as far as I see:
>>> 
>>> vcpu0                                         vcpu1:
>>> pi_test_and_set_pir() kvm_make_request(KVM_REQ_EVENT)
>>>                                             if (KVM_REQ_EVENT)
>>>                                                    sync_pir_to_irr()
>>>                                             vcpu->mode =
>>> IN_GUEST_MODE;
>>> if (vcpu->mode == IN_GUEST_MODE)
>>>   if (!pi_test_and_set_on())
>>>     apic->send_IPI_mask()
>>>                                             --> IPI arrives here
>>>                                             local_irq_disable();
>>>                                             posted_intr_clear_on()
>> Current solution is trying to block other Posted Interrupt from other VCPUs at
> same time. It only mitigates it but cannot solve it. The case you mentioned still
> exists but it should be rare.
>> 
> I am not sure I follow. What scenario exactly are you talking about. I
> looked over past discussion about it and saw that Marcelo gives an
> example how IPI can be lost, but I think that's because we set "on" bit
> after KVM_REQ_EVENT:
The IPI will not lost in his example(he misread the patch). 

> cpu0                                    cpu1            vcpu0
> test_and_set_bit(PIR-A) set KVM_REQ_EVENT
>                                                         process
>                                                         REQ_EVENT
>                                                         PIR-A->IRR
> 
> vcpu->mode=IN_GUEST
> 
> if (vcpu0->guest_mode)
>         if (!t_a_s_bit(PIR notif))
>                 send IPI
> linux_pir_handler
> 
>                                         t_a_s_b(PIR-B)=1
>                                         no PIR IPI sent
> 
> But what if on delivery we do:
> pi_test_and_set_pir()
> r = pi_test_and_set_on()
> kvm_make_request(KVM_REQ_EVENT)
> if (!r)
>    send_IPI_mask() else kvm_vcpu_kick()
> And on vcpu entry we do:
> if (kvm_check_request(KVM_REQ_EVENT)
>  if (test_and_clear_bit(on))
>    kvm_apic_update_irr()
> What are the downsides? Can we lost interrupts this way?
Need to check guest mode before sending IPI. Otherwise hypervisor may receive IPI.
I think current logic is ok. Only problem is that when to clear Outstanding Notification bit. Actually I prefer your suggestion to clear it before sync_pir_irr. But Marcelo prefer to clear ON bit after disabling irq.
 
>>> May be move vcpu->mode = IN_GUEST_MODE after local_irq_disable()?
>> Yes, this will solve it. But I am not sure whether it will introduce
>> any regressions. Is there any check relies on this sequence?
>> 
> Do not think so.
> 
> --
> 			Gleb.


Best regards,
Yang


--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Gleb Natapov March 19, 2013, 2:51 p.m. UTC | #7
On Tue, Mar 19, 2013 at 01:59:21PM +0000, Zhang, Yang Z wrote:
> Gleb Natapov wrote on 2013-03-19:
> > On Tue, Mar 19, 2013 at 12:42:01PM +0000, Zhang, Yang Z wrote:
> >>>>>>  	local_irq_disable();
> >>>>>> +	kvm_x86_ops->posted_intr_clear_on(vcpu);
> >>>>>> +
> >>>>> Why is this separate from pir_to_irr syncing?
> >>>> This is the result of discussion with Marcelo. It is more reasonable to
> >>>> put it here to avoid unnecessary posted interrupt between:
> >>>> 
> >>>> vcpu->mode = IN_GUEST_MODE;
> >>>> 
> >>>> <--interrupt may arrived here and this is unnecessary.
> >>>> 
> >>>> local_irq_disable();
> >>>> 
> >>> 
> >>> But this still can happen as far as I see:
> >>> 
> >>> vcpu0                                         vcpu1:
> >>> pi_test_and_set_pir() kvm_make_request(KVM_REQ_EVENT)
> >>>                                             if (KVM_REQ_EVENT)
> >>>                                                    sync_pir_to_irr()
> >>>                                             vcpu->mode =
> >>> IN_GUEST_MODE;
> >>> if (vcpu->mode == IN_GUEST_MODE)
> >>>   if (!pi_test_and_set_on())
> >>>     apic->send_IPI_mask()
> >>>                                             --> IPI arrives here
> >>>                                             local_irq_disable();
> >>>                                             posted_intr_clear_on()
> >> Current solution is trying to block other Posted Interrupt from other VCPUs at
> > same time. It only mitigates it but cannot solve it. The case you mentioned still
> > exists but it should be rare.
> >> 
> > I am not sure I follow. What scenario exactly are you talking about. I
> > looked over past discussion about it and saw that Marcelo gives an
> > example how IPI can be lost, but I think that's because we set "on" bit
> > after KVM_REQ_EVENT:
> The IPI will not lost in his example(he misread the patch). 
> 
> > cpu0                                    cpu1            vcpu0
> > test_and_set_bit(PIR-A) set KVM_REQ_EVENT
> >                                                         process
> >                                                         REQ_EVENT
> >                                                         PIR-A->IRR
> > 
> > vcpu->mode=IN_GUEST
> > 
> > if (vcpu0->guest_mode)
> >         if (!t_a_s_bit(PIR notif))
> >                 send IPI
> > linux_pir_handler
> > 
> >                                         t_a_s_b(PIR-B)=1
> >                                         no PIR IPI sent
> > 
> > But what if on delivery we do:
> > pi_test_and_set_pir()
> > r = pi_test_and_set_on()
> > kvm_make_request(KVM_REQ_EVENT)
> > if (!r)
> >    send_IPI_mask() else kvm_vcpu_kick()
> > And on vcpu entry we do:
> > if (kvm_check_request(KVM_REQ_EVENT)
> >  if (test_and_clear_bit(on))
> >    kvm_apic_update_irr()
> > What are the downsides? Can we lost interrupts this way?
> Need to check guest mode before sending IPI. Otherwise hypervisor may receive IPI.
Of course, forget it. So if (!r) should be if (!r && mode == IN_GUEST)

> I think current logic is ok. Only problem is that when to clear Outstanding Notification bit. Actually I prefer your suggestion to clear it before sync_pir_irr. But Marcelo prefer to clear ON bit after disabling irq.
Marcelo what is the problem with the logic above?

--
			Gleb.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Marcelo Tosatti March 19, 2013, 3:03 p.m. UTC | #8
On Tue, Mar 19, 2013 at 02:23:59PM +0200, Gleb Natapov wrote:
> On Tue, Mar 19, 2013 at 12:11:47PM +0000, Zhang, Yang Z wrote:
> > Gleb Natapov wrote on 2013-03-19:
> > > On Fri, Mar 15, 2013 at 09:31:11PM +0800, Yang Zhang wrote:
> > >> From: Yang Zhang <yang.z.zhang@Intel.com>
> > >> 
> > >> If posted interrupt is avaliable, then uses it to inject virtual
> > >> interrupt to guest.
> > >> 
> > >> Signed-off-by: Yang Zhang <yang.z.zhang@Intel.com>
> > >> ---
> > >>  arch/x86/kvm/irq.c   |    3 ++-
> > >>  arch/x86/kvm/lapic.c |   16 +++++++++++++---
> > >>  arch/x86/kvm/lapic.h |    1 +
> > >>  arch/x86/kvm/vmx.c   |   11 +++++++++++
> > >>  arch/x86/kvm/x86.c   |    4 ++++
> > >>  5 files changed, 31 insertions(+), 4 deletions(-)
> > >> diff --git a/arch/x86/kvm/irq.c b/arch/x86/kvm/irq.c
> > >> index 484bc87..5179988 100644
> > >> --- a/arch/x86/kvm/irq.c
> > >> +++ b/arch/x86/kvm/irq.c
> > >> @@ -81,7 +81,8 @@ int kvm_cpu_has_interrupt(struct kvm_vcpu *v)
> > >>  	if (kvm_cpu_has_extint(v))
> > >>  		return 1;
> > >> -	return kvm_apic_has_interrupt(v) != -1;	/* LAPIC */
> > >> +	return (kvm_apic_has_interrupt(v) != -1) ||
> > >> +		kvm_hwapic_has_interrupt(v);
> > > That's incorrect. kvm_cpu_has_interrupt() should return true only it
> > > there is IRR suitable to be injected, not just any IRR.
> > > kvm_apic_has_interrupt() should call kvm_apic_update_irr().
> > You are right.
> > 
> > >>  }
> > >>  EXPORT_SYMBOL_GPL(kvm_cpu_has_interrupt);
> > >> diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
> > >> index b3ea50e..46c7310 100644
> > >> --- a/arch/x86/kvm/lapic.c
> > >> +++ b/arch/x86/kvm/lapic.c
> > >> @@ -713,7 +713,10 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int
> > > delivery_mode,
> > >>  		} else
> > >>  			apic_clear_vector(vector, apic->regs + APIC_TMR);
> > >> -		result = !apic_test_and_set_irr(vector, apic);
> > >> +		result = 1;
> > >> +		if (!kvm_x86_ops->deliver_posted_interrupt(vcpu, vector))
> > >> +			result = !apic_test_and_set_irr(vector, apic);
> > >> +
> > >>  		trace_kvm_apic_accept_irq(vcpu->vcpu_id, delivery_mode,
> > >>  					  trig_mode, vector, !result);
> > >>  		if (!result) {
> > >> @@ -723,8 +726,10 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int
> > > delivery_mode,
> > >>  			break;
> > >>  		}
> > >> -		kvm_make_request(KVM_REQ_EVENT, vcpu);
> > >> -		kvm_vcpu_kick(vcpu);
> > >> +		if (!kvm_x86_ops->vm_has_apicv(vcpu->kvm)) {
> > >> +			kvm_make_request(KVM_REQ_EVENT, vcpu);
> > >> +			kvm_vcpu_kick(vcpu);
> > >> +		}
> > >>  		break;
> > > apicv code and non apicv code are completely different. What's the point
> > > checking for apicv twice here?
> > > Just do:
> > > 
> > > if (kvm_x86_ops->deliver_posted_interrupt)
> > > 	kvm_x86_ops->deliver_posted_interrupt(vcpu, vector)
> > > else {
> > > 	result = !apic_test_and_set_irr(vector, apic);
> > > 	kvm_make_request(KVM_REQ_EVENT, vcpu);
> > > 	kvm_vcpu_kick(vcpu);
> > > }
> > > 
> > > And set kvm_x86_ops->deliver_posted_interrupt only if apicv is enabled.
> > > 
> > > Also rearrange patches so that APIC_TMR handling goes before posted
> > > interrupt series.
> > Sure. 
> > 
> > >> 
> > >>  	case APIC_DM_REMRD: @@ -1604,6 +1609,11 @@ int
> > >>  kvm_apic_has_interrupt(struct kvm_vcpu *vcpu) 	return highest_irr; }
> > >> +bool kvm_hwapic_has_interrupt(struct kvm_vcpu *vcpu)
> > >> +{
> > >> +	return kvm_x86_ops->hwapic_has_interrupt(vcpu);
> > >> +}
> > >> +
> > >>  int kvm_apic_accept_pic_intr(struct kvm_vcpu *vcpu)
> > >>  {
> > >>  	u32 lvt0 = kvm_apic_get_reg(vcpu->arch.apic, APIC_LVT0);
> > >> diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h
> > >> index e5327be..c6abc63 100644
> > >> --- a/arch/x86/kvm/lapic.h
> > >> +++ b/arch/x86/kvm/lapic.h
> > >> @@ -37,6 +37,7 @@ int kvm_create_lapic(struct kvm_vcpu *vcpu);
> > >>  void kvm_free_lapic(struct kvm_vcpu *vcpu);
> > >>  
> > >>  int kvm_apic_has_interrupt(struct kvm_vcpu *vcpu); +bool
> > >>  kvm_hwapic_has_interrupt(struct kvm_vcpu *vcpu); int
> > >>  kvm_apic_accept_pic_intr(struct kvm_vcpu *vcpu); int
> > >>  kvm_get_apic_interrupt(struct kvm_vcpu *vcpu); void
> > >>  kvm_lapic_reset(struct kvm_vcpu *vcpu);
> > >> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
> > >> index 0b5a8ae..48a2239 100644
> > >> --- a/arch/x86/kvm/vmx.c
> > >> +++ b/arch/x86/kvm/vmx.c
> > >> @@ -3932,6 +3932,17 @@ static void vmx_posted_intr_clear_on(struct
> > > kvm_vcpu *vcpu)
> > >>  	clear_bit(POSTED_INTR_ON, (unsigned long *)&vmx->pi_desc.u.control);
> > >>  }
> > >> +/*
> > >> + * Send interrupt to vcpu via posted interrupt way.
> > >> + * Return false if posted interrupt is not supported and the caller will
> > >> + * roll back to old way(via set vIRR).
> > >> + * Return true if posted interrupt is avalialbe, the interrupt is set
> > >> + * in pir(posted interrupt requests):
> > >> + * 1. If target vcpu is running(non-root mode), send posted interrupt
> > >> + * notification to vcpu and hardware will sync pir to vIRR atomically.
> > >> + * 2. If target vcpu isn't running(root mode), kick it to pick up the
> > >> + * interrupt from pir in next vmentry.
> > >> + */
> > > The comment should go into previous patch. Also I prefer to not check
> > > for posted interrupt inside the callback, but set it to NULL instead.
> > > This way we avoid calling a callback on a hot path needlessly.
> > It's make sense. So just follow the logic you mentioned above?
> >  
> Yes.
> 
> > >>  static bool vmx_deliver_posted_interrupt(struct kvm_vcpu *vcpu, int vector)
> > >>  {
> > >>  	struct vcpu_vmx *vmx = to_vmx(vcpu);
> > >> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> > >> index 0baa90d..0981100 100644
> > >> --- a/arch/x86/kvm/x86.c
> > >> +++ b/arch/x86/kvm/x86.c
> > >> @@ -2679,6 +2679,7 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
> > >>  static int kvm_vcpu_ioctl_get_lapic(struct kvm_vcpu *vcpu, 				   
> > >>  struct kvm_lapic_state *s) { +	kvm_x86_ops->sync_pir_to_irr(vcpu);
> > >>  	memcpy(s->regs, vcpu->arch.apic->regs, sizeof *s);
> > >>  
> > >>  	return 0; @@ -5699,6 +5700,7 @@ static int vcpu_enter_guest(struct
> > >>  kvm_vcpu *vcpu) 	}
> > >>  
> > >>  	if (kvm_check_request(KVM_REQ_EVENT, vcpu) || req_int_win) {
> > >>  +		kvm_x86_ops->sync_pir_to_irr(vcpu); 		inject_pending_event(vcpu);
> > >>  
> > >>  		/* enable NMI/IRQ window open exits if needed */
> > >> @@ -5741,6 +5743,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
> > >> 
> > >>  	local_irq_disable();
> > >> +	kvm_x86_ops->posted_intr_clear_on(vcpu);
> > >> +
> > > Why is this separate from pir_to_irr syncing?
> > This is the result of discussion with Marcelo.
> > It is more reasonable to put it here to avoid unnecessary posted interrupt between:
> > 
> > vcpu->mode = IN_GUEST_MODE;
> > 
> > <--interrupt may arrived here and this is unnecessary.
> > 
> > local_irq_disable();
> > 
> 
> But this still can happen as far as I see:
> 
> vcpu0                                         vcpu1: 
> pi_test_and_set_pir()
> kvm_make_request(KVM_REQ_EVENT)
>                                             if (KVM_REQ_EVENT)
>                                                    sync_pir_to_irr()
						^^^^^^^^^^^^^^^^^^
>                                             vcpu->mode = IN_GUEST_MODE;
> if (vcpu->mode == IN_GUEST_MODE)
>   if (!pi_test_and_set_on())
>     apic->send_IPI_mask()
>                                             --> IPI arrives here
>                                             local_irq_disable();
>                                             posted_intr_clear_on()
> 
> 
> May be move vcpu->mode = IN_GUEST_MODE after local_irq_disable()?

Scenario is correct because injected PIR has been synced to IRR before
guest entry.

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Gleb Natapov March 19, 2013, 3:12 p.m. UTC | #9
On Tue, Mar 19, 2013 at 04:51:04PM +0200, Gleb Natapov wrote:
> On Tue, Mar 19, 2013 at 01:59:21PM +0000, Zhang, Yang Z wrote:
> > Gleb Natapov wrote on 2013-03-19:
> > > On Tue, Mar 19, 2013 at 12:42:01PM +0000, Zhang, Yang Z wrote:
> > >>>>>>  	local_irq_disable();
> > >>>>>> +	kvm_x86_ops->posted_intr_clear_on(vcpu);
> > >>>>>> +
> > >>>>> Why is this separate from pir_to_irr syncing?
> > >>>> This is the result of discussion with Marcelo. It is more reasonable to
> > >>>> put it here to avoid unnecessary posted interrupt between:
> > >>>> 
> > >>>> vcpu->mode = IN_GUEST_MODE;
> > >>>> 
> > >>>> <--interrupt may arrived here and this is unnecessary.
> > >>>> 
> > >>>> local_irq_disable();
> > >>>> 
> > >>> 
> > >>> But this still can happen as far as I see:
> > >>> 
> > >>> vcpu0                                         vcpu1:
> > >>> pi_test_and_set_pir() kvm_make_request(KVM_REQ_EVENT)
> > >>>                                             if (KVM_REQ_EVENT)
> > >>>                                                    sync_pir_to_irr()
> > >>>                                             vcpu->mode =
> > >>> IN_GUEST_MODE;
> > >>> if (vcpu->mode == IN_GUEST_MODE)
> > >>>   if (!pi_test_and_set_on())
> > >>>     apic->send_IPI_mask()
> > >>>                                             --> IPI arrives here
> > >>>                                             local_irq_disable();
> > >>>                                             posted_intr_clear_on()
> > >> Current solution is trying to block other Posted Interrupt from other VCPUs at
> > > same time. It only mitigates it but cannot solve it. The case you mentioned still
> > > exists but it should be rare.
> > >> 
> > > I am not sure I follow. What scenario exactly are you talking about. I
> > > looked over past discussion about it and saw that Marcelo gives an
> > > example how IPI can be lost, but I think that's because we set "on" bit
> > > after KVM_REQ_EVENT:
> > The IPI will not lost in his example(he misread the patch). 
> > 
> > > cpu0                                    cpu1            vcpu0
> > > test_and_set_bit(PIR-A) set KVM_REQ_EVENT
> > >                                                         process
> > >                                                         REQ_EVENT
> > >                                                         PIR-A->IRR
> > > 
> > > vcpu->mode=IN_GUEST
> > > 
> > > if (vcpu0->guest_mode)
> > >         if (!t_a_s_bit(PIR notif))
> > >                 send IPI
> > > linux_pir_handler
> > > 
> > >                                         t_a_s_b(PIR-B)=1
> > >                                         no PIR IPI sent
> > > 
> > > But what if on delivery we do:
> > > pi_test_and_set_pir()
> > > r = pi_test_and_set_on()
> > > kvm_make_request(KVM_REQ_EVENT)
> > > if (!r)
> > >    send_IPI_mask() else kvm_vcpu_kick()
> > > And on vcpu entry we do:
> > > if (kvm_check_request(KVM_REQ_EVENT)
> > >  if (test_and_clear_bit(on))
> > >    kvm_apic_update_irr()
> > > What are the downsides? Can we lost interrupts this way?
> > Need to check guest mode before sending IPI. Otherwise hypervisor may receive IPI.
> Of course, forget it. So if (!r) should be if (!r && mode == IN_GUEST)
> 
> > I think current logic is ok. Only problem is that when to clear Outstanding Notification bit. Actually I prefer your suggestion to clear it before sync_pir_irr. But Marcelo prefer to clear ON bit after disabling irq.
> Marcelo what is the problem with the logic above?
> 
Just to clarify the advantages that I see are: one less callback, no
need to sync pir to irr on each event and, arguably, a little bit
simpler logic.

--
			Gleb.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Marcelo Tosatti March 19, 2013, 3:13 p.m. UTC | #10
On Tue, Mar 19, 2013 at 03:29:24PM +0200, Gleb Natapov wrote:
> On Tue, Mar 19, 2013 at 12:42:01PM +0000, Zhang, Yang Z wrote:
> > >>>>  	local_irq_disable();
> > >>>> +	kvm_x86_ops->posted_intr_clear_on(vcpu);
> > >>>> +
> > >>> Why is this separate from pir_to_irr syncing?
> > >> This is the result of discussion with Marcelo. It is more reasonable to
> > >> put it here to avoid unnecessary posted interrupt between:
> > >> 
> > >> vcpu->mode = IN_GUEST_MODE;
> > >> 
> > >> <--interrupt may arrived here and this is unnecessary.
> > >> 
> > >> local_irq_disable();
> > >> 
> > > 
> > > But this still can happen as far as I see:
> > > 
> > > vcpu0                                         vcpu1:
> > > pi_test_and_set_pir() kvm_make_request(KVM_REQ_EVENT)
> > >                                             if (KVM_REQ_EVENT)
> > >                                                    sync_pir_to_irr()
> > >                                             vcpu->mode =
> > > IN_GUEST_MODE;
> > > if (vcpu->mode == IN_GUEST_MODE)
> > >   if (!pi_test_and_set_on())
> > >     apic->send_IPI_mask()
> > >                                             --> IPI arrives here
> > >                                             local_irq_disable();
> > >                                             posted_intr_clear_on()
> > Current solution is trying to block other Posted Interrupt from other VCPUs at same time. It only mitigates it but cannot solve it. The case you mentioned still exists but it should be rare.
> > 
> I am not sure I follow. What scenario exactly are you talking about. I
> looked over past discussion about it and saw that Marcelo gives an
> example how IPI can be lost, but I think that's because we set "on" bit
> after KVM_REQ_EVENT:
> 
> cpu0                                    cpu1            vcpu0
> test_and_set_bit(PIR-A)
> set KVM_REQ_EVENT
>                                                         process REQ_EVENT
>                                                         PIR-A->IRR
> 
>                                                         vcpu->mode=IN_GUEST
> 
> if (vcpu0->guest_mode)
>         if (!t_a_s_bit(PIR notif))
>                 send IPI
>                                                         linux_pir_handler
> 
>                                         t_a_s_b(PIR-B)=1
>                                         no PIR IPI sent
> 
> 
> But what if on delivery we do:
> pi_test_and_set_pir()
> r = pi_test_and_set_on()
> kvm_make_request(KVM_REQ_EVENT)
> if (!r)
>    send_IPI_mask()
> else
>    kvm_vcpu_kick()
> 
> And on vcpu entry we do:
> if (kvm_check_request(KVM_REQ_EVENT)
>  if (test_and_clear_bit(on))
>    kvm_apic_update_irr()
> 
> What are the downsides? Can we lost interrupts this way?

You should not ever enter guest mode with ON bit set on PIR (because that will
prevent PIR IPI from waking up interrupt injection logic). This is why
the ON bit should be cleared on VM entry.

> > > May be move vcpu->mode = IN_GUEST_MODE after local_irq_disable()?
> > Yes, this will solve it. But I am not sure whether it will introduce any regressions. Is there any check relies on this sequence?
> >  	
> Do not think so.

Can't see what is the problem with the code as it is?


--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Gleb Natapov March 19, 2013, 3:18 p.m. UTC | #11
On Tue, Mar 19, 2013 at 12:03:22PM -0300, Marcelo Tosatti wrote:
> On Tue, Mar 19, 2013 at 02:23:59PM +0200, Gleb Natapov wrote:
> > On Tue, Mar 19, 2013 at 12:11:47PM +0000, Zhang, Yang Z wrote:
> > > Gleb Natapov wrote on 2013-03-19:
> > > > On Fri, Mar 15, 2013 at 09:31:11PM +0800, Yang Zhang wrote:
> > > >> From: Yang Zhang <yang.z.zhang@Intel.com>
> > > >> 
> > > >> If posted interrupt is avaliable, then uses it to inject virtual
> > > >> interrupt to guest.
> > > >> 
> > > >> Signed-off-by: Yang Zhang <yang.z.zhang@Intel.com>
> > > >> ---
> > > >>  arch/x86/kvm/irq.c   |    3 ++-
> > > >>  arch/x86/kvm/lapic.c |   16 +++++++++++++---
> > > >>  arch/x86/kvm/lapic.h |    1 +
> > > >>  arch/x86/kvm/vmx.c   |   11 +++++++++++
> > > >>  arch/x86/kvm/x86.c   |    4 ++++
> > > >>  5 files changed, 31 insertions(+), 4 deletions(-)
> > > >> diff --git a/arch/x86/kvm/irq.c b/arch/x86/kvm/irq.c
> > > >> index 484bc87..5179988 100644
> > > >> --- a/arch/x86/kvm/irq.c
> > > >> +++ b/arch/x86/kvm/irq.c
> > > >> @@ -81,7 +81,8 @@ int kvm_cpu_has_interrupt(struct kvm_vcpu *v)
> > > >>  	if (kvm_cpu_has_extint(v))
> > > >>  		return 1;
> > > >> -	return kvm_apic_has_interrupt(v) != -1;	/* LAPIC */
> > > >> +	return (kvm_apic_has_interrupt(v) != -1) ||
> > > >> +		kvm_hwapic_has_interrupt(v);
> > > > That's incorrect. kvm_cpu_has_interrupt() should return true only it
> > > > there is IRR suitable to be injected, not just any IRR.
> > > > kvm_apic_has_interrupt() should call kvm_apic_update_irr().
> > > You are right.
> > > 
> > > >>  }
> > > >>  EXPORT_SYMBOL_GPL(kvm_cpu_has_interrupt);
> > > >> diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
> > > >> index b3ea50e..46c7310 100644
> > > >> --- a/arch/x86/kvm/lapic.c
> > > >> +++ b/arch/x86/kvm/lapic.c
> > > >> @@ -713,7 +713,10 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int
> > > > delivery_mode,
> > > >>  		} else
> > > >>  			apic_clear_vector(vector, apic->regs + APIC_TMR);
> > > >> -		result = !apic_test_and_set_irr(vector, apic);
> > > >> +		result = 1;
> > > >> +		if (!kvm_x86_ops->deliver_posted_interrupt(vcpu, vector))
> > > >> +			result = !apic_test_and_set_irr(vector, apic);
> > > >> +
> > > >>  		trace_kvm_apic_accept_irq(vcpu->vcpu_id, delivery_mode,
> > > >>  					  trig_mode, vector, !result);
> > > >>  		if (!result) {
> > > >> @@ -723,8 +726,10 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int
> > > > delivery_mode,
> > > >>  			break;
> > > >>  		}
> > > >> -		kvm_make_request(KVM_REQ_EVENT, vcpu);
> > > >> -		kvm_vcpu_kick(vcpu);
> > > >> +		if (!kvm_x86_ops->vm_has_apicv(vcpu->kvm)) {
> > > >> +			kvm_make_request(KVM_REQ_EVENT, vcpu);
> > > >> +			kvm_vcpu_kick(vcpu);
> > > >> +		}
> > > >>  		break;
> > > > apicv code and non apicv code are completely different. What's the point
> > > > checking for apicv twice here?
> > > > Just do:
> > > > 
> > > > if (kvm_x86_ops->deliver_posted_interrupt)
> > > > 	kvm_x86_ops->deliver_posted_interrupt(vcpu, vector)
> > > > else {
> > > > 	result = !apic_test_and_set_irr(vector, apic);
> > > > 	kvm_make_request(KVM_REQ_EVENT, vcpu);
> > > > 	kvm_vcpu_kick(vcpu);
> > > > }
> > > > 
> > > > And set kvm_x86_ops->deliver_posted_interrupt only if apicv is enabled.
> > > > 
> > > > Also rearrange patches so that APIC_TMR handling goes before posted
> > > > interrupt series.
> > > Sure. 
> > > 
> > > >> 
> > > >>  	case APIC_DM_REMRD: @@ -1604,6 +1609,11 @@ int
> > > >>  kvm_apic_has_interrupt(struct kvm_vcpu *vcpu) 	return highest_irr; }
> > > >> +bool kvm_hwapic_has_interrupt(struct kvm_vcpu *vcpu)
> > > >> +{
> > > >> +	return kvm_x86_ops->hwapic_has_interrupt(vcpu);
> > > >> +}
> > > >> +
> > > >>  int kvm_apic_accept_pic_intr(struct kvm_vcpu *vcpu)
> > > >>  {
> > > >>  	u32 lvt0 = kvm_apic_get_reg(vcpu->arch.apic, APIC_LVT0);
> > > >> diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h
> > > >> index e5327be..c6abc63 100644
> > > >> --- a/arch/x86/kvm/lapic.h
> > > >> +++ b/arch/x86/kvm/lapic.h
> > > >> @@ -37,6 +37,7 @@ int kvm_create_lapic(struct kvm_vcpu *vcpu);
> > > >>  void kvm_free_lapic(struct kvm_vcpu *vcpu);
> > > >>  
> > > >>  int kvm_apic_has_interrupt(struct kvm_vcpu *vcpu); +bool
> > > >>  kvm_hwapic_has_interrupt(struct kvm_vcpu *vcpu); int
> > > >>  kvm_apic_accept_pic_intr(struct kvm_vcpu *vcpu); int
> > > >>  kvm_get_apic_interrupt(struct kvm_vcpu *vcpu); void
> > > >>  kvm_lapic_reset(struct kvm_vcpu *vcpu);
> > > >> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
> > > >> index 0b5a8ae..48a2239 100644
> > > >> --- a/arch/x86/kvm/vmx.c
> > > >> +++ b/arch/x86/kvm/vmx.c
> > > >> @@ -3932,6 +3932,17 @@ static void vmx_posted_intr_clear_on(struct
> > > > kvm_vcpu *vcpu)
> > > >>  	clear_bit(POSTED_INTR_ON, (unsigned long *)&vmx->pi_desc.u.control);
> > > >>  }
> > > >> +/*
> > > >> + * Send interrupt to vcpu via posted interrupt way.
> > > >> + * Return false if posted interrupt is not supported and the caller will
> > > >> + * roll back to old way(via set vIRR).
> > > >> + * Return true if posted interrupt is avalialbe, the interrupt is set
> > > >> + * in pir(posted interrupt requests):
> > > >> + * 1. If target vcpu is running(non-root mode), send posted interrupt
> > > >> + * notification to vcpu and hardware will sync pir to vIRR atomically.
> > > >> + * 2. If target vcpu isn't running(root mode), kick it to pick up the
> > > >> + * interrupt from pir in next vmentry.
> > > >> + */
> > > > The comment should go into previous patch. Also I prefer to not check
> > > > for posted interrupt inside the callback, but set it to NULL instead.
> > > > This way we avoid calling a callback on a hot path needlessly.
> > > It's make sense. So just follow the logic you mentioned above?
> > >  
> > Yes.
> > 
> > > >>  static bool vmx_deliver_posted_interrupt(struct kvm_vcpu *vcpu, int vector)
> > > >>  {
> > > >>  	struct vcpu_vmx *vmx = to_vmx(vcpu);
> > > >> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> > > >> index 0baa90d..0981100 100644
> > > >> --- a/arch/x86/kvm/x86.c
> > > >> +++ b/arch/x86/kvm/x86.c
> > > >> @@ -2679,6 +2679,7 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
> > > >>  static int kvm_vcpu_ioctl_get_lapic(struct kvm_vcpu *vcpu, 				   
> > > >>  struct kvm_lapic_state *s) { +	kvm_x86_ops->sync_pir_to_irr(vcpu);
> > > >>  	memcpy(s->regs, vcpu->arch.apic->regs, sizeof *s);
> > > >>  
> > > >>  	return 0; @@ -5699,6 +5700,7 @@ static int vcpu_enter_guest(struct
> > > >>  kvm_vcpu *vcpu) 	}
> > > >>  
> > > >>  	if (kvm_check_request(KVM_REQ_EVENT, vcpu) || req_int_win) {
> > > >>  +		kvm_x86_ops->sync_pir_to_irr(vcpu); 		inject_pending_event(vcpu);
> > > >>  
> > > >>  		/* enable NMI/IRQ window open exits if needed */
> > > >> @@ -5741,6 +5743,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
> > > >> 
> > > >>  	local_irq_disable();
> > > >> +	kvm_x86_ops->posted_intr_clear_on(vcpu);
> > > >> +
> > > > Why is this separate from pir_to_irr syncing?
> > > This is the result of discussion with Marcelo.
> > > It is more reasonable to put it here to avoid unnecessary posted interrupt between:
> > > 
> > > vcpu->mode = IN_GUEST_MODE;
> > > 
> > > <--interrupt may arrived here and this is unnecessary.
> > > 
> > > local_irq_disable();
> > > 
> > 
> > But this still can happen as far as I see:
> > 
> > vcpu0                                         vcpu1: 
> > pi_test_and_set_pir()
> > kvm_make_request(KVM_REQ_EVENT)
> >                                             if (KVM_REQ_EVENT)
> >                                                    sync_pir_to_irr()
> 						^^^^^^^^^^^^^^^^^^
> >                                             vcpu->mode = IN_GUEST_MODE;
> > if (vcpu->mode == IN_GUEST_MODE)
> >   if (!pi_test_and_set_on())
> >     apic->send_IPI_mask()
> >                                             --> IPI arrives here
> >                                             local_irq_disable();
> >                                             posted_intr_clear_on()
> > 
> > 
> > May be move vcpu->mode = IN_GUEST_MODE after local_irq_disable()?
> 
> Scenario is correct because injected PIR has been synced to IRR before
> guest entry.
I do not say it is not. Yang said that the code tries to prevent this
from happening, I showed that this still can happen. There is not harm,
just useless IPI.

--
			Gleb.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Marcelo Tosatti March 19, 2013, 3:19 p.m. UTC | #12
On Tue, Mar 19, 2013 at 05:12:55PM +0200, Gleb Natapov wrote:
> On Tue, Mar 19, 2013 at 04:51:04PM +0200, Gleb Natapov wrote:
> > On Tue, Mar 19, 2013 at 01:59:21PM +0000, Zhang, Yang Z wrote:
> > > Gleb Natapov wrote on 2013-03-19:
> > > > On Tue, Mar 19, 2013 at 12:42:01PM +0000, Zhang, Yang Z wrote:
> > > >>>>>>  	local_irq_disable();
> > > >>>>>> +	kvm_x86_ops->posted_intr_clear_on(vcpu);
> > > >>>>>> +
> > > >>>>> Why is this separate from pir_to_irr syncing?
> > > >>>> This is the result of discussion with Marcelo. It is more reasonable to
> > > >>>> put it here to avoid unnecessary posted interrupt between:
> > > >>>> 
> > > >>>> vcpu->mode = IN_GUEST_MODE;
> > > >>>> 
> > > >>>> <--interrupt may arrived here and this is unnecessary.
> > > >>>> 
> > > >>>> local_irq_disable();
> > > >>>> 
> > > >>> 
> > > >>> But this still can happen as far as I see:
> > > >>> 
> > > >>> vcpu0                                         vcpu1:
> > > >>> pi_test_and_set_pir() kvm_make_request(KVM_REQ_EVENT)
> > > >>>                                             if (KVM_REQ_EVENT)
> > > >>>                                                    sync_pir_to_irr()
> > > >>>                                             vcpu->mode =
> > > >>> IN_GUEST_MODE;
> > > >>> if (vcpu->mode == IN_GUEST_MODE)
> > > >>>   if (!pi_test_and_set_on())
> > > >>>     apic->send_IPI_mask()
> > > >>>                                             --> IPI arrives here
> > > >>>                                             local_irq_disable();
> > > >>>                                             posted_intr_clear_on()
> > > >> Current solution is trying to block other Posted Interrupt from other VCPUs at
> > > > same time. It only mitigates it but cannot solve it. The case you mentioned still
> > > > exists but it should be rare.
> > > >> 
> > > > I am not sure I follow. What scenario exactly are you talking about. I
> > > > looked over past discussion about it and saw that Marcelo gives an
> > > > example how IPI can be lost, but I think that's because we set "on" bit
> > > > after KVM_REQ_EVENT:
> > > The IPI will not lost in his example(he misread the patch). 
> > > 
> > > > cpu0                                    cpu1            vcpu0
> > > > test_and_set_bit(PIR-A) set KVM_REQ_EVENT
> > > >                                                         process
> > > >                                                         REQ_EVENT
> > > >                                                         PIR-A->IRR
> > > > 
> > > > vcpu->mode=IN_GUEST
> > > > 
> > > > if (vcpu0->guest_mode)
> > > >         if (!t_a_s_bit(PIR notif))
> > > >                 send IPI
> > > > linux_pir_handler
> > > > 
> > > >                                         t_a_s_b(PIR-B)=1
> > > >                                         no PIR IPI sent
> > > > 
> > > > But what if on delivery we do:
> > > > pi_test_and_set_pir()
> > > > r = pi_test_and_set_on()
> > > > kvm_make_request(KVM_REQ_EVENT)
> > > > if (!r)
> > > >    send_IPI_mask() else kvm_vcpu_kick()
> > > > And on vcpu entry we do:
> > > > if (kvm_check_request(KVM_REQ_EVENT)
> > > >  if (test_and_clear_bit(on))
> > > >    kvm_apic_update_irr()
> > > > What are the downsides? Can we lost interrupts this way?
> > > Need to check guest mode before sending IPI. Otherwise hypervisor may receive IPI.
> > Of course, forget it. So if (!r) should be if (!r && mode == IN_GUEST)
> > 
> > > I think current logic is ok. Only problem is that when to clear Outstanding Notification bit. Actually I prefer your suggestion to clear it before sync_pir_irr. But Marcelo prefer to clear ON bit after disabling irq.
> > Marcelo what is the problem with the logic above?
> > 
> Just to clarify the advantages that I see are: one less callback, no
> need to sync pir to irr on each event and, arguably, a little bit
> simpler logic.

See the previous argument: should never enter guest mode with PIR ON bit
set. With logic above:  

context1				context2                      context3
					set_bit(PIR-1)                   
					r = pi_test_and_set_on()	set_bit(PIR-40)
					set_bit(KVM_REQ_EVENT)
if (kvm_check_request(KVM_REQ_EVENT)	
 if (test_and_clear_bit(on))

   kvm_apic_update_irr()						r = pi_test_and_set_on()

guest entry with PIR ON=1


Thats the reason for unconditional clearing on guest entry: it is easy
to verify its correct. I understand and agree the callback (and VMWRITE)
is not nice.

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Gleb Natapov March 19, 2013, 3:21 p.m. UTC | #13
On Tue, Mar 19, 2013 at 12:13:11PM -0300, Marcelo Tosatti wrote:
> On Tue, Mar 19, 2013 at 03:29:24PM +0200, Gleb Natapov wrote:
> > On Tue, Mar 19, 2013 at 12:42:01PM +0000, Zhang, Yang Z wrote:
> > > >>>>  	local_irq_disable();
> > > >>>> +	kvm_x86_ops->posted_intr_clear_on(vcpu);
> > > >>>> +
> > > >>> Why is this separate from pir_to_irr syncing?
> > > >> This is the result of discussion with Marcelo. It is more reasonable to
> > > >> put it here to avoid unnecessary posted interrupt between:
> > > >> 
> > > >> vcpu->mode = IN_GUEST_MODE;
> > > >> 
> > > >> <--interrupt may arrived here and this is unnecessary.
> > > >> 
> > > >> local_irq_disable();
> > > >> 
> > > > 
> > > > But this still can happen as far as I see:
> > > > 
> > > > vcpu0                                         vcpu1:
> > > > pi_test_and_set_pir() kvm_make_request(KVM_REQ_EVENT)
> > > >                                             if (KVM_REQ_EVENT)
> > > >                                                    sync_pir_to_irr()
> > > >                                             vcpu->mode =
> > > > IN_GUEST_MODE;
> > > > if (vcpu->mode == IN_GUEST_MODE)
> > > >   if (!pi_test_and_set_on())
> > > >     apic->send_IPI_mask()
> > > >                                             --> IPI arrives here
> > > >                                             local_irq_disable();
> > > >                                             posted_intr_clear_on()
> > > Current solution is trying to block other Posted Interrupt from other VCPUs at same time. It only mitigates it but cannot solve it. The case you mentioned still exists but it should be rare.
> > > 
> > I am not sure I follow. What scenario exactly are you talking about. I
> > looked over past discussion about it and saw that Marcelo gives an
> > example how IPI can be lost, but I think that's because we set "on" bit
> > after KVM_REQ_EVENT:
> > 
> > cpu0                                    cpu1            vcpu0
> > test_and_set_bit(PIR-A)
> > set KVM_REQ_EVENT
> >                                                         process REQ_EVENT
> >                                                         PIR-A->IRR
> > 
> >                                                         vcpu->mode=IN_GUEST
> > 
> > if (vcpu0->guest_mode)
> >         if (!t_a_s_bit(PIR notif))
> >                 send IPI
> >                                                         linux_pir_handler
> > 
> >                                         t_a_s_b(PIR-B)=1
> >                                         no PIR IPI sent
> > 
> > 
> > But what if on delivery we do:
> > pi_test_and_set_pir()
> > r = pi_test_and_set_on()
> > kvm_make_request(KVM_REQ_EVENT)
> > if (!r)
> >    send_IPI_mask()
> > else
> >    kvm_vcpu_kick()
> > 
> > And on vcpu entry we do:
> > if (kvm_check_request(KVM_REQ_EVENT)
> >  if (test_and_clear_bit(on))
> >    kvm_apic_update_irr()
> > 
> > What are the downsides? Can we lost interrupts this way?
> 
> You should not ever enter guest mode with ON bit set on PIR (because that will
> prevent PIR IPI from waking up interrupt injection logic). This is why
> the ON bit should be cleared on VM entry.
I agree that we should not, but I claim that this will not happen since
KVM_REQ_EVENT will prevent guest entry with ON bit set.

> 
> > > > May be move vcpu->mode = IN_GUEST_MODE after local_irq_disable()?
> > > Yes, this will solve it. But I am not sure whether it will introduce any regressions. Is there any check relies on this sequence?
> > >  	
> > Do not think so.
> 
> Can't see what is the problem with the code as it is?
> 
Just the useless IPI that can be prevented. I agree that this is not a
big deal unless shown otherwise.

--
			Gleb.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Marcelo Tosatti March 19, 2013, 3:27 p.m. UTC | #14
On Tue, Mar 19, 2013 at 12:19:55PM -0300, Marcelo Tosatti wrote:
> See the previous argument: should never enter guest mode with PIR ON bit
> set. With logic above:  
> 
> context1				context2                      context3
> 					set_bit(PIR-1)                   
> 					r = pi_test_and_set_on()	set_bit(PIR-40)
> 					set_bit(KVM_REQ_EVENT)
> if (kvm_check_request(KVM_REQ_EVENT)	
>  if (test_and_clear_bit(on))
> 
>    kvm_apic_update_irr()						r = pi_test_and_set_on()
> 
> guest entry with PIR ON=1
> 
> 
> Thats the reason for unconditional clearing on guest entry: it is easy
> to verify its correct. I understand and agree the callback (and VMWRITE)
> is not nice.

Re: KVM_REQ_EVENT setting after set_bit(KVM_REQ_EVENT) assures no guest
entry with PIR ON=1.

Might be, would have to verify. Its trickier though. Maybe add a FIXME:
to the callback and remove it later.

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Gleb Natapov March 19, 2013, 3:30 p.m. UTC | #15
On Tue, Mar 19, 2013 at 12:19:55PM -0300, Marcelo Tosatti wrote:
> On Tue, Mar 19, 2013 at 05:12:55PM +0200, Gleb Natapov wrote:
> > On Tue, Mar 19, 2013 at 04:51:04PM +0200, Gleb Natapov wrote:
> > > On Tue, Mar 19, 2013 at 01:59:21PM +0000, Zhang, Yang Z wrote:
> > > > Gleb Natapov wrote on 2013-03-19:
> > > > > On Tue, Mar 19, 2013 at 12:42:01PM +0000, Zhang, Yang Z wrote:
> > > > >>>>>>  	local_irq_disable();
> > > > >>>>>> +	kvm_x86_ops->posted_intr_clear_on(vcpu);
> > > > >>>>>> +
> > > > >>>>> Why is this separate from pir_to_irr syncing?
> > > > >>>> This is the result of discussion with Marcelo. It is more reasonable to
> > > > >>>> put it here to avoid unnecessary posted interrupt between:
> > > > >>>> 
> > > > >>>> vcpu->mode = IN_GUEST_MODE;
> > > > >>>> 
> > > > >>>> <--interrupt may arrived here and this is unnecessary.
> > > > >>>> 
> > > > >>>> local_irq_disable();
> > > > >>>> 
> > > > >>> 
> > > > >>> But this still can happen as far as I see:
> > > > >>> 
> > > > >>> vcpu0                                         vcpu1:
> > > > >>> pi_test_and_set_pir() kvm_make_request(KVM_REQ_EVENT)
> > > > >>>                                             if (KVM_REQ_EVENT)
> > > > >>>                                                    sync_pir_to_irr()
> > > > >>>                                             vcpu->mode =
> > > > >>> IN_GUEST_MODE;
> > > > >>> if (vcpu->mode == IN_GUEST_MODE)
> > > > >>>   if (!pi_test_and_set_on())
> > > > >>>     apic->send_IPI_mask()
> > > > >>>                                             --> IPI arrives here
> > > > >>>                                             local_irq_disable();
> > > > >>>                                             posted_intr_clear_on()
> > > > >> Current solution is trying to block other Posted Interrupt from other VCPUs at
> > > > > same time. It only mitigates it but cannot solve it. The case you mentioned still
> > > > > exists but it should be rare.
> > > > >> 
> > > > > I am not sure I follow. What scenario exactly are you talking about. I
> > > > > looked over past discussion about it and saw that Marcelo gives an
> > > > > example how IPI can be lost, but I think that's because we set "on" bit
> > > > > after KVM_REQ_EVENT:
> > > > The IPI will not lost in his example(he misread the patch). 
> > > > 
> > > > > cpu0                                    cpu1            vcpu0
> > > > > test_and_set_bit(PIR-A) set KVM_REQ_EVENT
> > > > >                                                         process
> > > > >                                                         REQ_EVENT
> > > > >                                                         PIR-A->IRR
> > > > > 
> > > > > vcpu->mode=IN_GUEST
> > > > > 
> > > > > if (vcpu0->guest_mode)
> > > > >         if (!t_a_s_bit(PIR notif))
> > > > >                 send IPI
> > > > > linux_pir_handler
> > > > > 
> > > > >                                         t_a_s_b(PIR-B)=1
> > > > >                                         no PIR IPI sent
> > > > > 
> > > > > But what if on delivery we do:
> > > > > pi_test_and_set_pir()
> > > > > r = pi_test_and_set_on()
> > > > > kvm_make_request(KVM_REQ_EVENT)
> > > > > if (!r)
> > > > >    send_IPI_mask() else kvm_vcpu_kick()
> > > > > And on vcpu entry we do:
> > > > > if (kvm_check_request(KVM_REQ_EVENT)
> > > > >  if (test_and_clear_bit(on))
> > > > >    kvm_apic_update_irr()
> > > > > What are the downsides? Can we lost interrupts this way?
> > > > Need to check guest mode before sending IPI. Otherwise hypervisor may receive IPI.
> > > Of course, forget it. So if (!r) should be if (!r && mode == IN_GUEST)
> > > 
> > > > I think current logic is ok. Only problem is that when to clear Outstanding Notification bit. Actually I prefer your suggestion to clear it before sync_pir_irr. But Marcelo prefer to clear ON bit after disabling irq.
> > > Marcelo what is the problem with the logic above?
> > > 
> > Just to clarify the advantages that I see are: one less callback, no
> > need to sync pir to irr on each event and, arguably, a little bit
> > simpler logic.
> 
> See the previous argument: should never enter guest mode with PIR ON bit
> set. With logic above:  
> 
> context1				context2                      context3
> 					set_bit(PIR-1)                   
> 					r = pi_test_and_set_on()	set_bit(PIR-40)
> 					set_bit(KVM_REQ_EVENT)
> if (kvm_check_request(KVM_REQ_EVENT)	
>  if (test_and_clear_bit(on))
> 
>    kvm_apic_update_irr()						r = pi_test_and_set_on()
> 
> guest entry with PIR ON=1
> 
> 
But that's OK since context3 will send IPI and PIR will be processed by
CPU. Let me amend myself: we should not enter guest mode with ON bit set
and no PI IPI pending in LAPIC.

> Thats the reason for unconditional clearing on guest entry: it is easy
> to verify its correct. I understand and agree the callback (and VMWRITE)
> is not nice.

--
			Gleb.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Gleb Natapov March 19, 2013, 4:10 p.m. UTC | #16
On Tue, Mar 19, 2013 at 12:27:38PM -0300, Marcelo Tosatti wrote:
> On Tue, Mar 19, 2013 at 12:19:55PM -0300, Marcelo Tosatti wrote:
> > See the previous argument: should never enter guest mode with PIR ON bit
> > set. With logic above:  
> > 
> > context1				context2                      context3
> > 					set_bit(PIR-1)                   
> > 					r = pi_test_and_set_on()	set_bit(PIR-40)
> > 					set_bit(KVM_REQ_EVENT)
> > if (kvm_check_request(KVM_REQ_EVENT)	
> >  if (test_and_clear_bit(on))
> > 
> >    kvm_apic_update_irr()						r = pi_test_and_set_on()
> > 
> > guest entry with PIR ON=1
> > 
> > 
> > Thats the reason for unconditional clearing on guest entry: it is easy
> > to verify its correct. I understand and agree the callback (and VMWRITE)
> > is not nice.
> 
> Re: KVM_REQ_EVENT setting after set_bit(KVM_REQ_EVENT) assures no guest
> entry with PIR ON=1.
> 
> Might be, would have to verify. Its trickier though. Maybe add a FIXME:
> to the callback and remove it later.
We have time still. RTC series is not ready yet. I'll think hard and try
to poke holes in the logic in this patch and you do the same for what I
propose.

--
			Gleb.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Zhang, Yang Z March 20, 2013, 11:47 a.m. UTC | #17
Gleb Natapov wrote on 2013-03-20:
> On Tue, Mar 19, 2013 at 12:27:38PM -0300, Marcelo Tosatti wrote:
>> On Tue, Mar 19, 2013 at 12:19:55PM -0300, Marcelo Tosatti wrote:
>>> See the previous argument: should never enter guest mode with PIR ON bit
>>> set. With logic above:
>>> 
>>> context1				context2                      context3
>>> 					set_bit(PIR-1)
>>> 					r = pi_test_and_set_on()	set_bit(PIR-40)
>>> 					set_bit(KVM_REQ_EVENT)
>>> if (kvm_check_request(KVM_REQ_EVENT)
>>>  if (test_and_clear_bit(on))
>>>    kvm_apic_update_irr()						r =
> pi_test_and_set_on()
>>> 
>>> guest entry with PIR ON=1
>>> 
>>> 
>>> Thats the reason for unconditional clearing on guest entry: it is easy
>>> to verify its correct. I understand and agree the callback (and VMWRITE)
>>> is not nice.
>> 
>> Re: KVM_REQ_EVENT setting after set_bit(KVM_REQ_EVENT) assures no guest
>> entry with PIR ON=1.
>> 
>> Might be, would have to verify. Its trickier though. Maybe add a FIXME:
>> to the callback and remove it later.
> We have time still. RTC series is not ready yet. I'll think hard and try
> to poke holes in the logic in this patch and you do the same for what I
> propose.
Any thought? As far as I see, the two solutions are ok. It's hard to say which is better. But clear ON bit when sync_pir_irr should be more clear and close to hardware's behavior.

Best regards,
Yang


--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Gleb Natapov March 20, 2013, 11:49 a.m. UTC | #18
On Wed, Mar 20, 2013 at 11:47:49AM +0000, Zhang, Yang Z wrote:
> Gleb Natapov wrote on 2013-03-20:
> > On Tue, Mar 19, 2013 at 12:27:38PM -0300, Marcelo Tosatti wrote:
> >> On Tue, Mar 19, 2013 at 12:19:55PM -0300, Marcelo Tosatti wrote:
> >>> See the previous argument: should never enter guest mode with PIR ON bit
> >>> set. With logic above:
> >>> 
> >>> context1				context2                      context3
> >>> 					set_bit(PIR-1)
> >>> 					r = pi_test_and_set_on()	set_bit(PIR-40)
> >>> 					set_bit(KVM_REQ_EVENT)
> >>> if (kvm_check_request(KVM_REQ_EVENT)
> >>>  if (test_and_clear_bit(on))
> >>>    kvm_apic_update_irr()						r =
> > pi_test_and_set_on()
> >>> 
> >>> guest entry with PIR ON=1
> >>> 
> >>> 
> >>> Thats the reason for unconditional clearing on guest entry: it is easy
> >>> to verify its correct. I understand and agree the callback (and VMWRITE)
> >>> is not nice.
> >> 
> >> Re: KVM_REQ_EVENT setting after set_bit(KVM_REQ_EVENT) assures no guest
> >> entry with PIR ON=1.
> >> 
> >> Might be, would have to verify. Its trickier though. Maybe add a FIXME:
> >> to the callback and remove it later.
> > We have time still. RTC series is not ready yet. I'll think hard and try
> > to poke holes in the logic in this patch and you do the same for what I
> > propose.
> Any thought? As far as I see, the two solutions are ok. It's hard to say which is better. But clear ON bit when sync_pir_irr should be more clear and close to hardware's behavior.
> 
Lets go with it unless we see why it will not work.

--
			Gleb.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Zhang, Yang Z March 20, 2013, 11:52 a.m. UTC | #19
Gleb Natapov wrote on 2013-03-20:
> On Wed, Mar 20, 2013 at 11:47:49AM +0000, Zhang, Yang Z wrote:
>> Gleb Natapov wrote on 2013-03-20:
>>> On Tue, Mar 19, 2013 at 12:27:38PM -0300, Marcelo Tosatti wrote:
>>>> On Tue, Mar 19, 2013 at 12:19:55PM -0300, Marcelo Tosatti wrote:
>>>>> See the previous argument: should never enter guest mode with PIR ON
>>>>> bit set. With logic above:
>>>>> 
>>>>> context1				context2                      context3
>>>>> 					set_bit(PIR-1)
>>>>> 					r = pi_test_and_set_on()	set_bit(PIR-40)
>>>>> 					set_bit(KVM_REQ_EVENT)
>>>>> if (kvm_check_request(KVM_REQ_EVENT)
>>>>>  if (test_and_clear_bit(on))
>>>>>    kvm_apic_update_irr()						r =
>>> pi_test_and_set_on()
>>>>> 
>>>>> guest entry with PIR ON=1
>>>>> 
>>>>> 
>>>>> Thats the reason for unconditional clearing on guest entry: it is easy
>>>>> to verify its correct. I understand and agree the callback (and VMWRITE)
>>>>> is not nice.
>>>> 
>>>> Re: KVM_REQ_EVENT setting after set_bit(KVM_REQ_EVENT) assures no
>>>> guest entry with PIR ON=1.
>>>> 
>>>> Might be, would have to verify. Its trickier though. Maybe add a FIXME:
>>>> to the callback and remove it later.
>>> We have time still. RTC series is not ready yet. I'll think hard and try
>>> to poke holes in the logic in this patch and you do the same for what I
>>> propose.
>> Any thought? As far as I see, the two solutions are ok. It's hard to say which is
> better. But clear ON bit when sync_pir_irr should be more clear and close to
> hardware's behavior.
>> 
> Lets go with it unless we see why it will not work.
Sure.

Best regards,
Yang


--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/arch/x86/kvm/irq.c b/arch/x86/kvm/irq.c
index 484bc87..5179988 100644
--- a/arch/x86/kvm/irq.c
+++ b/arch/x86/kvm/irq.c
@@ -81,7 +81,8 @@  int kvm_cpu_has_interrupt(struct kvm_vcpu *v)
 	if (kvm_cpu_has_extint(v))
 		return 1;
 
-	return kvm_apic_has_interrupt(v) != -1;	/* LAPIC */
+	return (kvm_apic_has_interrupt(v) != -1) ||
+		kvm_hwapic_has_interrupt(v);
 }
 EXPORT_SYMBOL_GPL(kvm_cpu_has_interrupt);
 
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index b3ea50e..46c7310 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -713,7 +713,10 @@  static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode,
 		} else
 			apic_clear_vector(vector, apic->regs + APIC_TMR);
 
-		result = !apic_test_and_set_irr(vector, apic);
+		result = 1;
+		if (!kvm_x86_ops->deliver_posted_interrupt(vcpu, vector))
+			result = !apic_test_and_set_irr(vector, apic);
+
 		trace_kvm_apic_accept_irq(vcpu->vcpu_id, delivery_mode,
 					  trig_mode, vector, !result);
 		if (!result) {
@@ -723,8 +726,10 @@  static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode,
 			break;
 		}
 
-		kvm_make_request(KVM_REQ_EVENT, vcpu);
-		kvm_vcpu_kick(vcpu);
+		if (!kvm_x86_ops->vm_has_apicv(vcpu->kvm)) {
+			kvm_make_request(KVM_REQ_EVENT, vcpu);
+			kvm_vcpu_kick(vcpu);
+		}
 		break;
 
 	case APIC_DM_REMRD:
@@ -1604,6 +1609,11 @@  int kvm_apic_has_interrupt(struct kvm_vcpu *vcpu)
 	return highest_irr;
 }
 
+bool kvm_hwapic_has_interrupt(struct kvm_vcpu *vcpu)
+{
+	return kvm_x86_ops->hwapic_has_interrupt(vcpu);
+}
+
 int kvm_apic_accept_pic_intr(struct kvm_vcpu *vcpu)
 {
 	u32 lvt0 = kvm_apic_get_reg(vcpu->arch.apic, APIC_LVT0);
diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h
index e5327be..c6abc63 100644
--- a/arch/x86/kvm/lapic.h
+++ b/arch/x86/kvm/lapic.h
@@ -37,6 +37,7 @@  int kvm_create_lapic(struct kvm_vcpu *vcpu);
 void kvm_free_lapic(struct kvm_vcpu *vcpu);
 
 int kvm_apic_has_interrupt(struct kvm_vcpu *vcpu);
+bool kvm_hwapic_has_interrupt(struct kvm_vcpu *vcpu);
 int kvm_apic_accept_pic_intr(struct kvm_vcpu *vcpu);
 int kvm_get_apic_interrupt(struct kvm_vcpu *vcpu);
 void kvm_lapic_reset(struct kvm_vcpu *vcpu);
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 0b5a8ae..48a2239 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -3932,6 +3932,17 @@  static void vmx_posted_intr_clear_on(struct kvm_vcpu *vcpu)
 	clear_bit(POSTED_INTR_ON, (unsigned long *)&vmx->pi_desc.u.control);
 }
 
+/*
+ * Send interrupt to vcpu via posted interrupt way.
+ * Return false if posted interrupt is not supported and the caller will
+ * roll back to old way(via set vIRR).
+ * Return true if posted interrupt is avalialbe, the interrupt is set
+ * in pir(posted interrupt requests):
+ * 1. If target vcpu is running(non-root mode), send posted interrupt
+ * notification to vcpu and hardware will sync pir to vIRR atomically.
+ * 2. If target vcpu isn't running(root mode), kick it to pick up the
+ * interrupt from pir in next vmentry.
+ */
 static bool vmx_deliver_posted_interrupt(struct kvm_vcpu *vcpu, int vector)
 {
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 0baa90d..0981100 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -2679,6 +2679,7 @@  void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
 static int kvm_vcpu_ioctl_get_lapic(struct kvm_vcpu *vcpu,
 				    struct kvm_lapic_state *s)
 {
+	kvm_x86_ops->sync_pir_to_irr(vcpu);
 	memcpy(s->regs, vcpu->arch.apic->regs, sizeof *s);
 
 	return 0;
@@ -5699,6 +5700,7 @@  static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
 	}
 
 	if (kvm_check_request(KVM_REQ_EVENT, vcpu) || req_int_win) {
+		kvm_x86_ops->sync_pir_to_irr(vcpu);
 		inject_pending_event(vcpu);
 
 		/* enable NMI/IRQ window open exits if needed */
@@ -5741,6 +5743,8 @@  static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
 
 	local_irq_disable();
 
+	kvm_x86_ops->posted_intr_clear_on(vcpu);
+
 	if (vcpu->mode == EXITING_GUEST_MODE || vcpu->requests
 	    || need_resched() || signal_pending(current)) {
 		vcpu->mode = OUTSIDE_GUEST_MODE;