diff mbox

[v7,4/7] KVM: Add reset/restore rtc_status support

Message ID 1364776816-30772-5-git-send-email-yang.z.zhang@intel.com (mailing list archive)
State New, archived
Headers show

Commit Message

Zhang, Yang Z April 1, 2013, 12:40 a.m. UTC
From: Yang Zhang <yang.z.zhang@Intel.com>

Signed-off-by: Yang Zhang <yang.z.zhang@Intel.com>
---
 arch/x86/kvm/lapic.c |    9 +++++++++
 arch/x86/kvm/lapic.h |    2 ++
 virt/kvm/ioapic.c    |   43 +++++++++++++++++++++++++++++++++++++++++++
 virt/kvm/ioapic.h    |    1 +
 4 files changed, 55 insertions(+), 0 deletions(-)

Comments

Gleb Natapov April 4, 2013, 11:58 a.m. UTC | #1
On Mon, Apr 01, 2013 at 08:40:13AM +0800, Yang Zhang wrote:
> From: Yang Zhang <yang.z.zhang@Intel.com>
> 
> Signed-off-by: Yang Zhang <yang.z.zhang@Intel.com>
> ---
>  arch/x86/kvm/lapic.c |    9 +++++++++
>  arch/x86/kvm/lapic.h |    2 ++
>  virt/kvm/ioapic.c    |   43 +++++++++++++++++++++++++++++++++++++++++++
>  virt/kvm/ioapic.h    |    1 +
>  4 files changed, 55 insertions(+), 0 deletions(-)
> 
> diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
> index 96ab160..9c041fa 100644
> --- a/arch/x86/kvm/lapic.c
> +++ b/arch/x86/kvm/lapic.c
> @@ -94,6 +94,14 @@ static inline int apic_test_vector(int vec, void *bitmap)
>  	return test_bit(VEC_POS(vec), (bitmap) + REG_POS(vec));
>  }
>  
> +bool kvm_apic_pending_eoi(struct kvm_vcpu *vcpu, int vector)
> +{
> +	struct kvm_lapic *apic = vcpu->arch.apic;
> +
> +	return apic_test_vector(vector, apic->regs + APIC_ISR) ||
> +		apic_test_vector(vector, apic->regs + APIC_IRR);
> +}
> +
>  static inline void apic_set_vector(int vec, void *bitmap)
>  {
>  	set_bit(VEC_POS(vec), (bitmap) + REG_POS(vec));
> @@ -1665,6 +1673,7 @@ void kvm_apic_post_state_restore(struct kvm_vcpu *vcpu,
>  	apic->highest_isr_cache = -1;
>  	kvm_x86_ops->hwapic_isr_update(vcpu->kvm, apic_find_highest_isr(apic));
>  	kvm_make_request(KVM_REQ_EVENT, vcpu);
> +	kvm_rtc_irq_restore(vcpu);
>  }
>  
>  void __kvm_migrate_apic_timer(struct kvm_vcpu *vcpu)
> diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h
> index 967519c..004d2ad 100644
> --- a/arch/x86/kvm/lapic.h
> +++ b/arch/x86/kvm/lapic.h
> @@ -170,4 +170,6 @@ static inline bool kvm_apic_has_events(struct kvm_vcpu *vcpu)
>  	return vcpu->arch.apic->pending_events;
>  }
>  
> +bool kvm_apic_pending_eoi(struct kvm_vcpu *vcpu, int vector);
> +
>  #endif
> diff --git a/virt/kvm/ioapic.c b/virt/kvm/ioapic.c
> index 8664812..0b12b17 100644
> --- a/virt/kvm/ioapic.c
> +++ b/virt/kvm/ioapic.c
> @@ -90,6 +90,47 @@ static unsigned long ioapic_read_indirect(struct kvm_ioapic *ioapic,
>  	return result;
>  }
>  
> +static void rtc_irq_reset(struct kvm_ioapic *ioapic)
> +{
> +	ioapic->rtc_status.pending_eoi = 0;
> +	bitmap_zero(ioapic->rtc_status.dest_map, KVM_MAX_VCPUS);
> +}
> +
> +static void rtc_irq_restore(struct kvm_ioapic *ioapic)
> +{
> +	struct kvm_vcpu *vcpu;
> +	int vector, i, pending_eoi = 0;
> +
> +	if (RTC_GSI >= IOAPIC_NUM_PINS)
> +		return;
> +
> +	vector = ioapic->redirtbl[RTC_GSI].fields.vector;
> +	kvm_for_each_vcpu(i, vcpu, ioapic->kvm) {
> +		if (kvm_apic_pending_eoi(vcpu, vector)) {
> +			pending_eoi++;
> +			__set_bit(vcpu->vcpu_id, ioapic->rtc_status.dest_map);
You should cleat dest_map at the beginning to get rid of stale bits.

> +		}
> +	}
> +	ioapic->rtc_status.pending_eoi = pending_eoi;
> +}
> +
> +void kvm_rtc_irq_restore(struct kvm_vcpu *vcpu)
> +{
> +	struct kvm_ioapic *ioapic = vcpu->kvm->arch.vioapic;
> +	int vector;
> +
> +	if (!ioapic)
> +		return;
> +
Can this be called if ioapic == NULL?
Should check for if (RTC_GSI >= IOAPIC_NUM_PINS) here too.

> +	spin_lock(&ioapic->lock);
> +	vector = ioapic->redirtbl[RTC_GSI].fields.vector;
> +	if (kvm_apic_pending_eoi(vcpu, vector)) {
> +		__set_bit(vcpu->vcpu_id, ioapic->rtc_status.dest_map);
> +		ioapic->rtc_status.pending_eoi++;
The bit may have been set already. The logic should be:

new_val = kvm_apic_pending_eoi(vcpu, vector)
old_val = set_bit(vcpu_id, dest_map)

if (new_val == old_val)
	return;

if (new_val) {
	__set_bit(vcpu_id, dest_map);
	pending_eoi++;
} else {
	__clear_bit(vcpu_id, dest_map);
	pending_eoi--;
}

The naming of above two functions are not good either. Call
them something like kvm_rtc_eoi_tracking_restore_all() and
kvm_rtc_eoi_tracking_restore_one().  And _all should call _one() for
each vcpu. Make __rtc_irq_eoi_tracking_restore_one() that does not
take ioapic lock and call it from kvm_rtc_eoi_tracking_restore_one()
surrounded by locks.

--
			Gleb.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Zhang, Yang Z April 7, 2013, 2:30 a.m. UTC | #2
Gleb Natapov wrote on 2013-04-04:
> On Mon, Apr 01, 2013 at 08:40:13AM +0800, Yang Zhang wrote:
>> From: Yang Zhang <yang.z.zhang@Intel.com>
>> 
>> Signed-off-by: Yang Zhang <yang.z.zhang@Intel.com>
>> ---
>>  arch/x86/kvm/lapic.c |    9 +++++++++ arch/x86/kvm/lapic.h |    2 ++
>>  virt/kvm/ioapic.c    |   43
>>  +++++++++++++++++++++++++++++++++++++++++++ virt/kvm/ioapic.h    |   
>>  1 + 4 files changed, 55 insertions(+), 0 deletions(-)
>> diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
>> index 96ab160..9c041fa 100644
>> --- a/arch/x86/kvm/lapic.c
>> +++ b/arch/x86/kvm/lapic.c
>> @@ -94,6 +94,14 @@ static inline int apic_test_vector(int vec, void *bitmap)
>>  	return test_bit(VEC_POS(vec), (bitmap) + REG_POS(vec));
>>  }
>> +bool kvm_apic_pending_eoi(struct kvm_vcpu *vcpu, int vector)
>> +{
>> +	struct kvm_lapic *apic = vcpu->arch.apic;
>> +
>> +	return apic_test_vector(vector, apic->regs + APIC_ISR) ||
>> +		apic_test_vector(vector, apic->regs + APIC_IRR);
>> +}
>> +
>>  static inline void apic_set_vector(int vec, void *bitmap)
>>  {
>>  	set_bit(VEC_POS(vec), (bitmap) + REG_POS(vec));
>> @@ -1665,6 +1673,7 @@ void kvm_apic_post_state_restore(struct kvm_vcpu
> *vcpu,
>>  	apic->highest_isr_cache = -1;
>>  	kvm_x86_ops->hwapic_isr_update(vcpu->kvm,
>>  apic_find_highest_isr(apic)); 	kvm_make_request(KVM_REQ_EVENT, vcpu);
>>  +	kvm_rtc_irq_restore(vcpu); }
>>  
>>  void __kvm_migrate_apic_timer(struct kvm_vcpu *vcpu)
>> diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h
>> index 967519c..004d2ad 100644
>> --- a/arch/x86/kvm/lapic.h
>> +++ b/arch/x86/kvm/lapic.h
>> @@ -170,4 +170,6 @@ static inline bool kvm_apic_has_events(struct
> kvm_vcpu *vcpu)
>>  	return vcpu->arch.apic->pending_events;
>>  }
>> +bool kvm_apic_pending_eoi(struct kvm_vcpu *vcpu, int vector);
>> +
>>  #endif
>> diff --git a/virt/kvm/ioapic.c b/virt/kvm/ioapic.c
>> index 8664812..0b12b17 100644
>> --- a/virt/kvm/ioapic.c
>> +++ b/virt/kvm/ioapic.c
>> @@ -90,6 +90,47 @@ static unsigned long ioapic_read_indirect(struct
> kvm_ioapic *ioapic,
>>  	return result;
>>  }
>> +static void rtc_irq_reset(struct kvm_ioapic *ioapic)
>> +{
>> +	ioapic->rtc_status.pending_eoi = 0;
>> +	bitmap_zero(ioapic->rtc_status.dest_map, KVM_MAX_VCPUS);
>> +}
>> +
>> +static void rtc_irq_restore(struct kvm_ioapic *ioapic)
>> +{
>> +	struct kvm_vcpu *vcpu;
>> +	int vector, i, pending_eoi = 0;
>> +
>> +	if (RTC_GSI >= IOAPIC_NUM_PINS)
>> +		return;
>> +
>> +	vector = ioapic->redirtbl[RTC_GSI].fields.vector;
>> +	kvm_for_each_vcpu(i, vcpu, ioapic->kvm) {
>> +		if (kvm_apic_pending_eoi(vcpu, vector)) {
>> +			pending_eoi++;
>> +			__set_bit(vcpu->vcpu_id, ioapic->rtc_status.dest_map);
> You should cleat dest_map at the beginning to get rid of stale bits.
I thought kvm_set_ioapic is called only after save/restore or migration. And the ioapic should be reset successfully before call it. So the dest_map is empty before call rtc_irq_restore().
But it is possible kvm_set_ioapic is called beside save/restore or migration. Right?

> 
>> +		}
>> +	}
>> +	ioapic->rtc_status.pending_eoi = pending_eoi;
>> +}
>> +
>> +void kvm_rtc_irq_restore(struct kvm_vcpu *vcpu)
>> +{
>> +	struct kvm_ioapic *ioapic = vcpu->kvm->arch.vioapic;
>> +	int vector;
>> +
>> +	if (!ioapic)
>> +		return;
>> +
> Can this be called if ioapic == NULL?
Yes. IIRC, unit test will test lapic function without ioapic.

> Should check for if (RTC_GSI >= IOAPIC_NUM_PINS) here too.
Not necessary. kvm_rtc_irq_restore is called from "arch/x86/" and we have the defination:
#ifdef CONFIG_X86
#define RTC_GSI 8

The check will be false always. As the logic you suggested below, this check is necessary for _all() not _one();

> 
>> +	spin_lock(&ioapic->lock);
>> +	vector = ioapic->redirtbl[RTC_GSI].fields.vector;
>> +	if (kvm_apic_pending_eoi(vcpu, vector)) {
>> +		__set_bit(vcpu->vcpu_id, ioapic->rtc_status.dest_map);
>> +		ioapic->rtc_status.pending_eoi++;
> The bit may have been set already. The logic should be:
Right.

>
> 
> new_val = kvm_apic_pending_eoi(vcpu, vector)
> old_val = set_bit(vcpu_id, dest_map)
> 
> if (new_val == old_val)
> 	return;
> 
> if (new_val) {
> 	__set_bit(vcpu_id, dest_map);
> 	pending_eoi++;
> } else {
> 	__clear_bit(vcpu_id, dest_map);
> 	pending_eoi--;
> }
> 
> The naming of above two functions are not good either. Call
> them something like kvm_rtc_eoi_tracking_restore_all() and
> kvm_rtc_eoi_tracking_restore_one().  And _all should call _one() for
> each vcpu. Make __rtc_irq_eoi_tracking_restore_one() that does not
> take ioapic lock and call it from kvm_rtc_eoi_tracking_restore_one()
> surrounded by locks.
Ok. Just confirm whether I am understanding correct:

kvm_rtc_eoi_tracking_restore_all():
{
for_each_vcpu:
	kvm_rtc_eoi_tracking_restore_one():
}

kvm_rtc_eoi_tracking_restore_one():
{
lock();
__rtc_irq_eoi_tracking_restore_one():
unlock();
}

kvm_set_ioapic()
{
kvm_rtc_eoi_tracking_restore_all();
}

kvm_apic_post_state_restore()
{
kvm_rtc_eoi_tracking_restore_one();
}

> 
> --
> 			Gleb.
> --
> To unsubscribe from this list: send the line "unsubscribe kvm" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html


Best regards,
Yang


--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Gleb Natapov April 7, 2013, 9:17 a.m. UTC | #3
On Sun, Apr 07, 2013 at 02:30:15AM +0000, Zhang, Yang Z wrote:
> Gleb Natapov wrote on 2013-04-04:
> > On Mon, Apr 01, 2013 at 08:40:13AM +0800, Yang Zhang wrote:
> >> From: Yang Zhang <yang.z.zhang@Intel.com>
> >> 
> >> Signed-off-by: Yang Zhang <yang.z.zhang@Intel.com>
> >> ---
> >>  arch/x86/kvm/lapic.c |    9 +++++++++ arch/x86/kvm/lapic.h |    2 ++
> >>  virt/kvm/ioapic.c    |   43
> >>  +++++++++++++++++++++++++++++++++++++++++++ virt/kvm/ioapic.h    |   
> >>  1 + 4 files changed, 55 insertions(+), 0 deletions(-)
> >> diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
> >> index 96ab160..9c041fa 100644
> >> --- a/arch/x86/kvm/lapic.c
> >> +++ b/arch/x86/kvm/lapic.c
> >> @@ -94,6 +94,14 @@ static inline int apic_test_vector(int vec, void *bitmap)
> >>  	return test_bit(VEC_POS(vec), (bitmap) + REG_POS(vec));
> >>  }
> >> +bool kvm_apic_pending_eoi(struct kvm_vcpu *vcpu, int vector)
> >> +{
> >> +	struct kvm_lapic *apic = vcpu->arch.apic;
> >> +
> >> +	return apic_test_vector(vector, apic->regs + APIC_ISR) ||
> >> +		apic_test_vector(vector, apic->regs + APIC_IRR);
> >> +}
> >> +
> >>  static inline void apic_set_vector(int vec, void *bitmap)
> >>  {
> >>  	set_bit(VEC_POS(vec), (bitmap) + REG_POS(vec));
> >> @@ -1665,6 +1673,7 @@ void kvm_apic_post_state_restore(struct kvm_vcpu
> > *vcpu,
> >>  	apic->highest_isr_cache = -1;
> >>  	kvm_x86_ops->hwapic_isr_update(vcpu->kvm,
> >>  apic_find_highest_isr(apic)); 	kvm_make_request(KVM_REQ_EVENT, vcpu);
> >>  +	kvm_rtc_irq_restore(vcpu); }
> >>  
> >>  void __kvm_migrate_apic_timer(struct kvm_vcpu *vcpu)
> >> diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h
> >> index 967519c..004d2ad 100644
> >> --- a/arch/x86/kvm/lapic.h
> >> +++ b/arch/x86/kvm/lapic.h
> >> @@ -170,4 +170,6 @@ static inline bool kvm_apic_has_events(struct
> > kvm_vcpu *vcpu)
> >>  	return vcpu->arch.apic->pending_events;
> >>  }
> >> +bool kvm_apic_pending_eoi(struct kvm_vcpu *vcpu, int vector);
> >> +
> >>  #endif
> >> diff --git a/virt/kvm/ioapic.c b/virt/kvm/ioapic.c
> >> index 8664812..0b12b17 100644
> >> --- a/virt/kvm/ioapic.c
> >> +++ b/virt/kvm/ioapic.c
> >> @@ -90,6 +90,47 @@ static unsigned long ioapic_read_indirect(struct
> > kvm_ioapic *ioapic,
> >>  	return result;
> >>  }
> >> +static void rtc_irq_reset(struct kvm_ioapic *ioapic)
> >> +{
> >> +	ioapic->rtc_status.pending_eoi = 0;
> >> +	bitmap_zero(ioapic->rtc_status.dest_map, KVM_MAX_VCPUS);
> >> +}
> >> +
> >> +static void rtc_irq_restore(struct kvm_ioapic *ioapic)
> >> +{
> >> +	struct kvm_vcpu *vcpu;
> >> +	int vector, i, pending_eoi = 0;
> >> +
> >> +	if (RTC_GSI >= IOAPIC_NUM_PINS)
> >> +		return;
> >> +
> >> +	vector = ioapic->redirtbl[RTC_GSI].fields.vector;
> >> +	kvm_for_each_vcpu(i, vcpu, ioapic->kvm) {
> >> +		if (kvm_apic_pending_eoi(vcpu, vector)) {
> >> +			pending_eoi++;
> >> +			__set_bit(vcpu->vcpu_id, ioapic->rtc_status.dest_map);
> > You should cleat dest_map at the beginning to get rid of stale bits.
> I thought kvm_set_ioapic is called only after save/restore or migration. And the ioapic should be reset successfully before call it. So the dest_map is empty before call rtc_irq_restore().
> But it is possible kvm_set_ioapic is called beside save/restore or migration. Right?
> 
First of all userspace should not care when it calls kvm_set_ioapic()
the kernel need to do the right thing. Second, believe it or not,
kvm_ioapic_reset() is not called during system reset. Instead userspace
reset it by calling kvm_set_ioapic() with ioapic state after reset.

> > 
> >> +		}
> >> +	}
> >> +	ioapic->rtc_status.pending_eoi = pending_eoi;
> >> +}
> >> +
> >> +void kvm_rtc_irq_restore(struct kvm_vcpu *vcpu)
> >> +{
> >> +	struct kvm_ioapic *ioapic = vcpu->kvm->arch.vioapic;
> >> +	int vector;
> >> +
> >> +	if (!ioapic)
> >> +		return;
> >> +
> > Can this be called if ioapic == NULL?
> Yes. IIRC, unit test will test lapic function without ioapic.
Unit test is a guest code. This has nothing to do with a guest code.
Unit test is not the one who creates lapic.

> 
> > Should check for if (RTC_GSI >= IOAPIC_NUM_PINS) here too.
> Not necessary. kvm_rtc_irq_restore is called from "arch/x86/" and we have the defination:
> #ifdef CONFIG_X86
> #define RTC_GSI 8
> 
> The check will be false always. As the logic you suggested below, this check is necessary for _all() not _one();
OK.

> 
> > 
> >> +	spin_lock(&ioapic->lock);
> >> +	vector = ioapic->redirtbl[RTC_GSI].fields.vector;
> >> +	if (kvm_apic_pending_eoi(vcpu, vector)) {
> >> +		__set_bit(vcpu->vcpu_id, ioapic->rtc_status.dest_map);
> >> +		ioapic->rtc_status.pending_eoi++;
> > The bit may have been set already. The logic should be:
> Right.
> 
> >
> > 
> > new_val = kvm_apic_pending_eoi(vcpu, vector)
> > old_val = set_bit(vcpu_id, dest_map)
> > 
> > if (new_val == old_val)
> > 	return;
> > 
> > if (new_val) {
> > 	__set_bit(vcpu_id, dest_map);
> > 	pending_eoi++;
> > } else {
> > 	__clear_bit(vcpu_id, dest_map);
> > 	pending_eoi--;
> > }
> > 
> > The naming of above two functions are not good either. Call
> > them something like kvm_rtc_eoi_tracking_restore_all() and
> > kvm_rtc_eoi_tracking_restore_one().  And _all should call _one() for
> > each vcpu. Make __rtc_irq_eoi_tracking_restore_one() that does not
> > take ioapic lock and call it from kvm_rtc_eoi_tracking_restore_one()
> > surrounded by locks.
> Ok. Just confirm whether I am understanding correct:
> 
> kvm_rtc_eoi_tracking_restore_all():
> {
> for_each_vcpu:
> 	kvm_rtc_eoi_tracking_restore_one():
        __rtc_irq_eoi_tracking_restore_one();
Since caller will have the lock already.

> }
> 
> kvm_rtc_eoi_tracking_restore_one():
> {
> lock();
> __rtc_irq_eoi_tracking_restore_one():
> unlock();
> }
> 
> kvm_set_ioapic()
> {
> kvm_rtc_eoi_tracking_restore_all();
> }
> 
> kvm_apic_post_state_restore()
> {
> kvm_rtc_eoi_tracking_restore_one();
> }
> 
Yes.

--
			Gleb.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Zhang, Yang Z April 7, 2013, 12:39 p.m. UTC | #4
Gleb Natapov wrote on 2013-04-07:
> On Sun, Apr 07, 2013 at 02:30:15AM +0000, Zhang, Yang Z wrote:
>> Gleb Natapov wrote on 2013-04-04:
>>> On Mon, Apr 01, 2013 at 08:40:13AM +0800, Yang Zhang wrote:
>>>> From: Yang Zhang <yang.z.zhang@Intel.com>
>>>> 
>>>> Signed-off-by: Yang Zhang <yang.z.zhang@Intel.com>
>>>> ---
>>>>  arch/x86/kvm/lapic.c |    9 +++++++++ arch/x86/kvm/lapic.h |    2 ++
>>>>  virt/kvm/ioapic.c    |   43
>>>>  +++++++++++++++++++++++++++++++++++++++++++ virt/kvm/ioapic.h | 1 +
>>>>  4 files changed, 55 insertions(+), 0 deletions(-)
>>>> diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
>>>> index 96ab160..9c041fa 100644
>>>> --- a/arch/x86/kvm/lapic.c
>>>> +++ b/arch/x86/kvm/lapic.c
>>>> @@ -94,6 +94,14 @@ static inline int apic_test_vector(int vec, void *bitmap)
>>>>  	return test_bit(VEC_POS(vec), (bitmap) + REG_POS(vec));
>>>>  }
>>>> +bool kvm_apic_pending_eoi(struct kvm_vcpu *vcpu, int vector)
>>>> +{
>>>> +	struct kvm_lapic *apic = vcpu->arch.apic;
>>>> +
>>>> +	return apic_test_vector(vector, apic->regs + APIC_ISR) ||
>>>> +		apic_test_vector(vector, apic->regs + APIC_IRR);
>>>> +}
>>>> +
>>>>  static inline void apic_set_vector(int vec, void *bitmap)
>>>>  {
>>>>  	set_bit(VEC_POS(vec), (bitmap) + REG_POS(vec));
>>>> @@ -1665,6 +1673,7 @@ void kvm_apic_post_state_restore(struct
> kvm_vcpu
>>> *vcpu,
>>>>  	apic->highest_isr_cache = -1;
>>>>  	kvm_x86_ops->hwapic_isr_update(vcpu->kvm,
>>>>  apic_find_highest_isr(apic)); 	kvm_make_request(KVM_REQ_EVENT,
>>>>  vcpu); +	kvm_rtc_irq_restore(vcpu); }
>>>>  
>>>>  void __kvm_migrate_apic_timer(struct kvm_vcpu *vcpu)
>>>> diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h
>>>> index 967519c..004d2ad 100644
>>>> --- a/arch/x86/kvm/lapic.h
>>>> +++ b/arch/x86/kvm/lapic.h
>>>> @@ -170,4 +170,6 @@ static inline bool kvm_apic_has_events(struct
>>> kvm_vcpu *vcpu)
>>>>  	return vcpu->arch.apic->pending_events;
>>>>  }
>>>> +bool kvm_apic_pending_eoi(struct kvm_vcpu *vcpu, int vector);
>>>> +
>>>>  #endif
>>>> diff --git a/virt/kvm/ioapic.c b/virt/kvm/ioapic.c
>>>> index 8664812..0b12b17 100644
>>>> --- a/virt/kvm/ioapic.c
>>>> +++ b/virt/kvm/ioapic.c
>>>> @@ -90,6 +90,47 @@ static unsigned long ioapic_read_indirect(struct
>>> kvm_ioapic *ioapic,
>>>>  	return result;
>>>>  }
>>>> +static void rtc_irq_reset(struct kvm_ioapic *ioapic)
>>>> +{
>>>> +	ioapic->rtc_status.pending_eoi = 0;
>>>> +	bitmap_zero(ioapic->rtc_status.dest_map, KVM_MAX_VCPUS);
>>>> +}
>>>> +
>>>> +static void rtc_irq_restore(struct kvm_ioapic *ioapic)
>>>> +{
>>>> +	struct kvm_vcpu *vcpu;
>>>> +	int vector, i, pending_eoi = 0;
>>>> +
>>>> +	if (RTC_GSI >= IOAPIC_NUM_PINS)
>>>> +		return;
>>>> +
>>>> +	vector = ioapic->redirtbl[RTC_GSI].fields.vector;
>>>> +	kvm_for_each_vcpu(i, vcpu, ioapic->kvm) {
>>>> +		if (kvm_apic_pending_eoi(vcpu, vector)) {
>>>> +			pending_eoi++;
>>>> +			__set_bit(vcpu->vcpu_id, ioapic->rtc_status.dest_map);
>>> You should cleat dest_map at the beginning to get rid of stale bits.
>> I thought kvm_set_ioapic is called only after save/restore or migration. And the
> ioapic should be reset successfully before call it. So the dest_map is empty
> before call rtc_irq_restore().
>> But it is possible kvm_set_ioapic is called beside save/restore or
>> migration. Right?
>> 
> First of all userspace should not care when it calls kvm_set_ioapic()
> the kernel need to do the right thing. Second, believe it or not,
> kvm_ioapic_reset() is not called during system reset. Instead userspace
> reset it by calling kvm_set_ioapic() with ioapic state after reset.
Ok. I see. As the logic you suggested, it will clear dest_map if no pending eoi in vcpu, so we don't need to do it again.

> 
>>> 
>>>> +		}
>>>> +	}
>>>> +	ioapic->rtc_status.pending_eoi = pending_eoi;
>>>> +}
>>>> +
>>>> +void kvm_rtc_irq_restore(struct kvm_vcpu *vcpu)
>>>> +{
>>>> +	struct kvm_ioapic *ioapic = vcpu->kvm->arch.vioapic;
>>>> +	int vector;
>>>> +
>>>> +	if (!ioapic)
>>>> +		return;
>>>> +
>>> Can this be called if ioapic == NULL?
>> Yes. IIRC, unit test will test lapic function without ioapic.
> Unit test is a guest code. This has nothing to do with a guest code.
> Unit test is not the one who creates lapic.
Ok. Then !ioapic is unnecessary.

>> 
>>> Should check for if (RTC_GSI >= IOAPIC_NUM_PINS) here too.
>> Not necessary. kvm_rtc_irq_restore is called from "arch/x86/" and we
>> have the defination: #ifdef CONFIG_X86 #define RTC_GSI 8
>> 
>> The check will be false always. As the logic you suggested below, this check is
> necessary for _all() not _one();
> OK.
> 
>> 
>>> 
>>>> +	spin_lock(&ioapic->lock);
>>>> +	vector = ioapic->redirtbl[RTC_GSI].fields.vector;
>>>> +	if (kvm_apic_pending_eoi(vcpu, vector)) {
>>>> +		__set_bit(vcpu->vcpu_id, ioapic->rtc_status.dest_map);
>>>> +		ioapic->rtc_status.pending_eoi++;
>>> The bit may have been set already. The logic should be:
>> Right.
>> 
>>> 
>>> 
>>> new_val = kvm_apic_pending_eoi(vcpu, vector)
>>> old_val = set_bit(vcpu_id, dest_map)
>>> 
>>> if (new_val == old_val)
>>> 	return;
>>> 
>>> if (new_val) {
>>> 	__set_bit(vcpu_id, dest_map);
>>> 	pending_eoi++;
>>> } else {
>>> 	__clear_bit(vcpu_id, dest_map);
>>> 	pending_eoi--;
>>> }
>>> 
>>> The naming of above two functions are not good either. Call
>>> them something like kvm_rtc_eoi_tracking_restore_all() and
>>> kvm_rtc_eoi_tracking_restore_one().  And _all should call _one() for
>>> each vcpu. Make __rtc_irq_eoi_tracking_restore_one() that does not
>>> take ioapic lock and call it from kvm_rtc_eoi_tracking_restore_one()
>>> surrounded by locks.
>> Ok. Just confirm whether I am understanding correct:
>> 
>> kvm_rtc_eoi_tracking_restore_all():
>> {
>> for_each_vcpu:
>> 	kvm_rtc_eoi_tracking_restore_one():
>         __rtc_irq_eoi_tracking_restore_one();
> Since caller will have the lock already.
> 
>> }
>> 
>> kvm_rtc_eoi_tracking_restore_one():
>> {
>> lock();
>> __rtc_irq_eoi_tracking_restore_one():
>> unlock();
>> }
>> 
>> kvm_set_ioapic()
>> {
>> kvm_rtc_eoi_tracking_restore_all();
>> }
>> 
>> kvm_apic_post_state_restore()
>> {
>> kvm_rtc_eoi_tracking_restore_one();
>> }
>> 
> Yes.
> 
> --
> 			Gleb.


Best regards,
Yang

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Gleb Natapov April 7, 2013, 12:42 p.m. UTC | #5
On Sun, Apr 07, 2013 at 12:39:32PM +0000, Zhang, Yang Z wrote:
> Gleb Natapov wrote on 2013-04-07:
> > On Sun, Apr 07, 2013 at 02:30:15AM +0000, Zhang, Yang Z wrote:
> >> Gleb Natapov wrote on 2013-04-04:
> >>> On Mon, Apr 01, 2013 at 08:40:13AM +0800, Yang Zhang wrote:
> >>>> From: Yang Zhang <yang.z.zhang@Intel.com>
> >>>> 
> >>>> Signed-off-by: Yang Zhang <yang.z.zhang@Intel.com>
> >>>> ---
> >>>>  arch/x86/kvm/lapic.c |    9 +++++++++ arch/x86/kvm/lapic.h |    2 ++
> >>>>  virt/kvm/ioapic.c    |   43
> >>>>  +++++++++++++++++++++++++++++++++++++++++++ virt/kvm/ioapic.h | 1 +
> >>>>  4 files changed, 55 insertions(+), 0 deletions(-)
> >>>> diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
> >>>> index 96ab160..9c041fa 100644
> >>>> --- a/arch/x86/kvm/lapic.c
> >>>> +++ b/arch/x86/kvm/lapic.c
> >>>> @@ -94,6 +94,14 @@ static inline int apic_test_vector(int vec, void *bitmap)
> >>>>  	return test_bit(VEC_POS(vec), (bitmap) + REG_POS(vec));
> >>>>  }
> >>>> +bool kvm_apic_pending_eoi(struct kvm_vcpu *vcpu, int vector)
> >>>> +{
> >>>> +	struct kvm_lapic *apic = vcpu->arch.apic;
> >>>> +
> >>>> +	return apic_test_vector(vector, apic->regs + APIC_ISR) ||
> >>>> +		apic_test_vector(vector, apic->regs + APIC_IRR);
> >>>> +}
> >>>> +
> >>>>  static inline void apic_set_vector(int vec, void *bitmap)
> >>>>  {
> >>>>  	set_bit(VEC_POS(vec), (bitmap) + REG_POS(vec));
> >>>> @@ -1665,6 +1673,7 @@ void kvm_apic_post_state_restore(struct
> > kvm_vcpu
> >>> *vcpu,
> >>>>  	apic->highest_isr_cache = -1;
> >>>>  	kvm_x86_ops->hwapic_isr_update(vcpu->kvm,
> >>>>  apic_find_highest_isr(apic)); 	kvm_make_request(KVM_REQ_EVENT,
> >>>>  vcpu); +	kvm_rtc_irq_restore(vcpu); }
> >>>>  
> >>>>  void __kvm_migrate_apic_timer(struct kvm_vcpu *vcpu)
> >>>> diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h
> >>>> index 967519c..004d2ad 100644
> >>>> --- a/arch/x86/kvm/lapic.h
> >>>> +++ b/arch/x86/kvm/lapic.h
> >>>> @@ -170,4 +170,6 @@ static inline bool kvm_apic_has_events(struct
> >>> kvm_vcpu *vcpu)
> >>>>  	return vcpu->arch.apic->pending_events;
> >>>>  }
> >>>> +bool kvm_apic_pending_eoi(struct kvm_vcpu *vcpu, int vector);
> >>>> +
> >>>>  #endif
> >>>> diff --git a/virt/kvm/ioapic.c b/virt/kvm/ioapic.c
> >>>> index 8664812..0b12b17 100644
> >>>> --- a/virt/kvm/ioapic.c
> >>>> +++ b/virt/kvm/ioapic.c
> >>>> @@ -90,6 +90,47 @@ static unsigned long ioapic_read_indirect(struct
> >>> kvm_ioapic *ioapic,
> >>>>  	return result;
> >>>>  }
> >>>> +static void rtc_irq_reset(struct kvm_ioapic *ioapic)
> >>>> +{
> >>>> +	ioapic->rtc_status.pending_eoi = 0;
> >>>> +	bitmap_zero(ioapic->rtc_status.dest_map, KVM_MAX_VCPUS);
> >>>> +}
> >>>> +
> >>>> +static void rtc_irq_restore(struct kvm_ioapic *ioapic)
> >>>> +{
> >>>> +	struct kvm_vcpu *vcpu;
> >>>> +	int vector, i, pending_eoi = 0;
> >>>> +
> >>>> +	if (RTC_GSI >= IOAPIC_NUM_PINS)
> >>>> +		return;
> >>>> +
> >>>> +	vector = ioapic->redirtbl[RTC_GSI].fields.vector;
> >>>> +	kvm_for_each_vcpu(i, vcpu, ioapic->kvm) {
> >>>> +		if (kvm_apic_pending_eoi(vcpu, vector)) {
> >>>> +			pending_eoi++;
> >>>> +			__set_bit(vcpu->vcpu_id, ioapic->rtc_status.dest_map);
> >>> You should cleat dest_map at the beginning to get rid of stale bits.
> >> I thought kvm_set_ioapic is called only after save/restore or migration. And the
> > ioapic should be reset successfully before call it. So the dest_map is empty
> > before call rtc_irq_restore().
> >> But it is possible kvm_set_ioapic is called beside save/restore or
> >> migration. Right?
> >> 
> > First of all userspace should not care when it calls kvm_set_ioapic()
> > the kernel need to do the right thing. Second, believe it or not,
> > kvm_ioapic_reset() is not called during system reset. Instead userspace
> > reset it by calling kvm_set_ioapic() with ioapic state after reset.
> Ok. I see. As the logic you suggested, it will clear dest_map if no pending eoi in vcpu, so we don't need to do it again.
> 
You again rely on userspace doing thing in certain manner. What is
set_lapic() is never called? Kernel internal state have to be correct
after each ioctl call.

--
			Gleb.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Zhang, Yang Z April 7, 2013, 1:05 p.m. UTC | #6
Gleb Natapov wrote on 2013-04-07:
> On Sun, Apr 07, 2013 at 12:39:32PM +0000, Zhang, Yang Z wrote:
>> Gleb Natapov wrote on 2013-04-07:
>>> On Sun, Apr 07, 2013 at 02:30:15AM +0000, Zhang, Yang Z wrote:
>>>> Gleb Natapov wrote on 2013-04-04:
>>>>> On Mon, Apr 01, 2013 at 08:40:13AM +0800, Yang Zhang wrote:
>>>>>> From: Yang Zhang <yang.z.zhang@Intel.com>
>>>>>> 
>>>>>> Signed-off-by: Yang Zhang <yang.z.zhang@Intel.com>
>>>>>> ---
>>>>>>  arch/x86/kvm/lapic.c |    9 +++++++++ arch/x86/kvm/lapic.h |    2
>>>>>>  ++ virt/kvm/ioapic.c    |   43
>>>>>>  +++++++++++++++++++++++++++++++++++++++++++ virt/kvm/ioapic.h | 1
>>>>>>  + 4 files changed, 55 insertions(+), 0 deletions(-)
>>>>>> diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
>>>>>> index 96ab160..9c041fa 100644
>>>>>> --- a/arch/x86/kvm/lapic.c
>>>>>> +++ b/arch/x86/kvm/lapic.c
>>>>>> @@ -94,6 +94,14 @@ static inline int apic_test_vector(int vec, void
> *bitmap)
>>>>>>  	return test_bit(VEC_POS(vec), (bitmap) + REG_POS(vec));
>>>>>>  }
>>>>>> +bool kvm_apic_pending_eoi(struct kvm_vcpu *vcpu, int vector)
>>>>>> +{
>>>>>> +	struct kvm_lapic *apic = vcpu->arch.apic;
>>>>>> +
>>>>>> +	return apic_test_vector(vector, apic->regs + APIC_ISR) ||
>>>>>> +		apic_test_vector(vector, apic->regs + APIC_IRR);
>>>>>> +}
>>>>>> +
>>>>>>  static inline void apic_set_vector(int vec, void *bitmap)
>>>>>>  {
>>>>>>  	set_bit(VEC_POS(vec), (bitmap) + REG_POS(vec));
>>>>>> @@ -1665,6 +1673,7 @@ void kvm_apic_post_state_restore(struct
>>> kvm_vcpu
>>>>> *vcpu,
>>>>>>  	apic->highest_isr_cache = -1;
>>>>>>  	kvm_x86_ops->hwapic_isr_update(vcpu->kvm,
>>>>>>  apic_find_highest_isr(apic)); 	kvm_make_request(KVM_REQ_EVENT,
>>>>>>  vcpu); +	kvm_rtc_irq_restore(vcpu); }
>>>>>>  
>>>>>>  void __kvm_migrate_apic_timer(struct kvm_vcpu *vcpu)
>>>>>> diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h
>>>>>> index 967519c..004d2ad 100644
>>>>>> --- a/arch/x86/kvm/lapic.h
>>>>>> +++ b/arch/x86/kvm/lapic.h
>>>>>> @@ -170,4 +170,6 @@ static inline bool kvm_apic_has_events(struct
>>>>> kvm_vcpu *vcpu)
>>>>>>  	return vcpu->arch.apic->pending_events;
>>>>>>  }
>>>>>> +bool kvm_apic_pending_eoi(struct kvm_vcpu *vcpu, int vector);
>>>>>> +
>>>>>>  #endif
>>>>>> diff --git a/virt/kvm/ioapic.c b/virt/kvm/ioapic.c
>>>>>> index 8664812..0b12b17 100644
>>>>>> --- a/virt/kvm/ioapic.c
>>>>>> +++ b/virt/kvm/ioapic.c
>>>>>> @@ -90,6 +90,47 @@ static unsigned long ioapic_read_indirect(struct
>>>>> kvm_ioapic *ioapic,
>>>>>>  	return result;
>>>>>>  }
>>>>>> +static void rtc_irq_reset(struct kvm_ioapic *ioapic)
>>>>>> +{
>>>>>> +	ioapic->rtc_status.pending_eoi = 0;
>>>>>> +	bitmap_zero(ioapic->rtc_status.dest_map, KVM_MAX_VCPUS);
>>>>>> +}
>>>>>> +
>>>>>> +static void rtc_irq_restore(struct kvm_ioapic *ioapic)
>>>>>> +{
>>>>>> +	struct kvm_vcpu *vcpu;
>>>>>> +	int vector, i, pending_eoi = 0;
>>>>>> +
>>>>>> +	if (RTC_GSI >= IOAPIC_NUM_PINS)
>>>>>> +		return;
>>>>>> +
>>>>>> +	vector = ioapic->redirtbl[RTC_GSI].fields.vector;
>>>>>> +	kvm_for_each_vcpu(i, vcpu, ioapic->kvm) {
>>>>>> +		if (kvm_apic_pending_eoi(vcpu, vector)) {
>>>>>> +			pending_eoi++;
>>>>>> +			__set_bit(vcpu->vcpu_id, ioapic->rtc_status.dest_map);
>>>>> You should cleat dest_map at the beginning to get rid of stale bits.
>>>> I thought kvm_set_ioapic is called only after save/restore or migration. And
> the
>>> ioapic should be reset successfully before call it. So the dest_map is empty
>>> before call rtc_irq_restore().
>>>> But it is possible kvm_set_ioapic is called beside save/restore or
>>>> migration. Right?
>>>> 
>>> First of all userspace should not care when it calls kvm_set_ioapic()
>>> the kernel need to do the right thing. Second, believe it or not,
>>> kvm_ioapic_reset() is not called during system reset. Instead userspace
>>> reset it by calling kvm_set_ioapic() with ioapic state after reset.
>> Ok. I see. As the logic you suggested, it will clear dest_map if no
>> pending eoi in vcpu, so we don't need to do it again.
>> 
> You again rely on userspace doing thing in certain manner. What is
> set_lapic() is never called? Kernel internal state have to be correct
> after each ioctl call.
Sorry. I cannot figure out what's the problem if don't clear dest_map? Can you elaborate it?

Best regards,
Yang


--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Gleb Natapov April 7, 2013, 1:08 p.m. UTC | #7
On Sun, Apr 07, 2013 at 01:05:02PM +0000, Zhang, Yang Z wrote:
> Gleb Natapov wrote on 2013-04-07:
> > On Sun, Apr 07, 2013 at 12:39:32PM +0000, Zhang, Yang Z wrote:
> >> Gleb Natapov wrote on 2013-04-07:
> >>> On Sun, Apr 07, 2013 at 02:30:15AM +0000, Zhang, Yang Z wrote:
> >>>> Gleb Natapov wrote on 2013-04-04:
> >>>>> On Mon, Apr 01, 2013 at 08:40:13AM +0800, Yang Zhang wrote:
> >>>>>> From: Yang Zhang <yang.z.zhang@Intel.com>
> >>>>>> 
> >>>>>> Signed-off-by: Yang Zhang <yang.z.zhang@Intel.com>
> >>>>>> ---
> >>>>>>  arch/x86/kvm/lapic.c |    9 +++++++++ arch/x86/kvm/lapic.h |    2
> >>>>>>  ++ virt/kvm/ioapic.c    |   43
> >>>>>>  +++++++++++++++++++++++++++++++++++++++++++ virt/kvm/ioapic.h | 1
> >>>>>>  + 4 files changed, 55 insertions(+), 0 deletions(-)
> >>>>>> diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
> >>>>>> index 96ab160..9c041fa 100644
> >>>>>> --- a/arch/x86/kvm/lapic.c
> >>>>>> +++ b/arch/x86/kvm/lapic.c
> >>>>>> @@ -94,6 +94,14 @@ static inline int apic_test_vector(int vec, void
> > *bitmap)
> >>>>>>  	return test_bit(VEC_POS(vec), (bitmap) + REG_POS(vec));
> >>>>>>  }
> >>>>>> +bool kvm_apic_pending_eoi(struct kvm_vcpu *vcpu, int vector)
> >>>>>> +{
> >>>>>> +	struct kvm_lapic *apic = vcpu->arch.apic;
> >>>>>> +
> >>>>>> +	return apic_test_vector(vector, apic->regs + APIC_ISR) ||
> >>>>>> +		apic_test_vector(vector, apic->regs + APIC_IRR);
> >>>>>> +}
> >>>>>> +
> >>>>>>  static inline void apic_set_vector(int vec, void *bitmap)
> >>>>>>  {
> >>>>>>  	set_bit(VEC_POS(vec), (bitmap) + REG_POS(vec));
> >>>>>> @@ -1665,6 +1673,7 @@ void kvm_apic_post_state_restore(struct
> >>> kvm_vcpu
> >>>>> *vcpu,
> >>>>>>  	apic->highest_isr_cache = -1;
> >>>>>>  	kvm_x86_ops->hwapic_isr_update(vcpu->kvm,
> >>>>>>  apic_find_highest_isr(apic)); 	kvm_make_request(KVM_REQ_EVENT,
> >>>>>>  vcpu); +	kvm_rtc_irq_restore(vcpu); }
> >>>>>>  
> >>>>>>  void __kvm_migrate_apic_timer(struct kvm_vcpu *vcpu)
> >>>>>> diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h
> >>>>>> index 967519c..004d2ad 100644
> >>>>>> --- a/arch/x86/kvm/lapic.h
> >>>>>> +++ b/arch/x86/kvm/lapic.h
> >>>>>> @@ -170,4 +170,6 @@ static inline bool kvm_apic_has_events(struct
> >>>>> kvm_vcpu *vcpu)
> >>>>>>  	return vcpu->arch.apic->pending_events;
> >>>>>>  }
> >>>>>> +bool kvm_apic_pending_eoi(struct kvm_vcpu *vcpu, int vector);
> >>>>>> +
> >>>>>>  #endif
> >>>>>> diff --git a/virt/kvm/ioapic.c b/virt/kvm/ioapic.c
> >>>>>> index 8664812..0b12b17 100644
> >>>>>> --- a/virt/kvm/ioapic.c
> >>>>>> +++ b/virt/kvm/ioapic.c
> >>>>>> @@ -90,6 +90,47 @@ static unsigned long ioapic_read_indirect(struct
> >>>>> kvm_ioapic *ioapic,
> >>>>>>  	return result;
> >>>>>>  }
> >>>>>> +static void rtc_irq_reset(struct kvm_ioapic *ioapic)
> >>>>>> +{
> >>>>>> +	ioapic->rtc_status.pending_eoi = 0;
> >>>>>> +	bitmap_zero(ioapic->rtc_status.dest_map, KVM_MAX_VCPUS);
> >>>>>> +}
> >>>>>> +
> >>>>>> +static void rtc_irq_restore(struct kvm_ioapic *ioapic)
> >>>>>> +{
> >>>>>> +	struct kvm_vcpu *vcpu;
> >>>>>> +	int vector, i, pending_eoi = 0;
> >>>>>> +
> >>>>>> +	if (RTC_GSI >= IOAPIC_NUM_PINS)
> >>>>>> +		return;
> >>>>>> +
> >>>>>> +	vector = ioapic->redirtbl[RTC_GSI].fields.vector;
> >>>>>> +	kvm_for_each_vcpu(i, vcpu, ioapic->kvm) {
> >>>>>> +		if (kvm_apic_pending_eoi(vcpu, vector)) {
> >>>>>> +			pending_eoi++;
> >>>>>> +			__set_bit(vcpu->vcpu_id, ioapic->rtc_status.dest_map);
> >>>>> You should cleat dest_map at the beginning to get rid of stale bits.
> >>>> I thought kvm_set_ioapic is called only after save/restore or migration. And
> > the
> >>> ioapic should be reset successfully before call it. So the dest_map is empty
> >>> before call rtc_irq_restore().
> >>>> But it is possible kvm_set_ioapic is called beside save/restore or
> >>>> migration. Right?
> >>>> 
> >>> First of all userspace should not care when it calls kvm_set_ioapic()
> >>> the kernel need to do the right thing. Second, believe it or not,
> >>> kvm_ioapic_reset() is not called during system reset. Instead userspace
> >>> reset it by calling kvm_set_ioapic() with ioapic state after reset.
> >> Ok. I see. As the logic you suggested, it will clear dest_map if no
> >> pending eoi in vcpu, so we don't need to do it again.
> >> 
> > You again rely on userspace doing thing in certain manner. What is
> > set_lapic() is never called? Kernel internal state have to be correct
> > after each ioctl call.
> Sorry. I cannot figure out what's the problem if don't clear dest_map? Can you elaborate it?
> 
What is not obvious about it? If there is a bit in dest_map that should
be cleared after rtc_irq_restore() it will not.

--
			Gleb.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Zhang, Yang Z April 7, 2013, 1:16 p.m. UTC | #8
Gleb Natapov wrote on 2013-04-07:
> On Sun, Apr 07, 2013 at 01:05:02PM +0000, Zhang, Yang Z wrote:
>> Gleb Natapov wrote on 2013-04-07:
>>> On Sun, Apr 07, 2013 at 12:39:32PM +0000, Zhang, Yang Z wrote:
>>>> Gleb Natapov wrote on 2013-04-07:
>>>>> On Sun, Apr 07, 2013 at 02:30:15AM +0000, Zhang, Yang Z wrote:
>>>>>> Gleb Natapov wrote on 2013-04-04:
>>>>>>> On Mon, Apr 01, 2013 at 08:40:13AM +0800, Yang Zhang wrote:
>>>>>>>> From: Yang Zhang <yang.z.zhang@Intel.com>
>>>>>>>> 
>>>>>>>> Signed-off-by: Yang Zhang <yang.z.zhang@Intel.com>
>>>>>>>> ---
>>>>>>>>  arch/x86/kvm/lapic.c |    9 +++++++++ arch/x86/kvm/lapic.h | 2
>>>>>>>>  ++ virt/kvm/ioapic.c    |   43
>>>>>>>>  +++++++++++++++++++++++++++++++++++++++++++ virt/kvm/ioapic.h |
>>>>>>>>  1 + 4 files changed, 55 insertions(+), 0 deletions(-)
>>>>>>>> diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
>>>>>>>> index 96ab160..9c041fa 100644
>>>>>>>> --- a/arch/x86/kvm/lapic.c
>>>>>>>> +++ b/arch/x86/kvm/lapic.c
>>>>>>>> @@ -94,6 +94,14 @@ static inline int apic_test_vector(int vec, void
>>> *bitmap)
>>>>>>>>  	return test_bit(VEC_POS(vec), (bitmap) + REG_POS(vec));
>>>>>>>>  }
>>>>>>>> +bool kvm_apic_pending_eoi(struct kvm_vcpu *vcpu, int vector)
>>>>>>>> +{
>>>>>>>> +	struct kvm_lapic *apic = vcpu->arch.apic;
>>>>>>>> +
>>>>>>>> +	return apic_test_vector(vector, apic->regs + APIC_ISR) ||
>>>>>>>> +		apic_test_vector(vector, apic->regs + APIC_IRR);
>>>>>>>> +}
>>>>>>>> +
>>>>>>>>  static inline void apic_set_vector(int vec, void *bitmap)
>>>>>>>>  {
>>>>>>>>  	set_bit(VEC_POS(vec), (bitmap) + REG_POS(vec));
>>>>>>>> @@ -1665,6 +1673,7 @@ void kvm_apic_post_state_restore(struct
>>>>> kvm_vcpu
>>>>>>> *vcpu,
>>>>>>>>  	apic->highest_isr_cache = -1;
>>>>>>>>  	kvm_x86_ops->hwapic_isr_update(vcpu->kvm,
>>>>>>>>  apic_find_highest_isr(apic)); 	kvm_make_request(KVM_REQ_EVENT,
>>>>>>>>  vcpu); +	kvm_rtc_irq_restore(vcpu); }
>>>>>>>>  
>>>>>>>>  void __kvm_migrate_apic_timer(struct kvm_vcpu *vcpu)
>>>>>>>> diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h
>>>>>>>> index 967519c..004d2ad 100644
>>>>>>>> --- a/arch/x86/kvm/lapic.h
>>>>>>>> +++ b/arch/x86/kvm/lapic.h
>>>>>>>> @@ -170,4 +170,6 @@ static inline bool kvm_apic_has_events(struct
>>>>>>> kvm_vcpu *vcpu)
>>>>>>>>  	return vcpu->arch.apic->pending_events;
>>>>>>>>  }
>>>>>>>> +bool kvm_apic_pending_eoi(struct kvm_vcpu *vcpu, int vector);
>>>>>>>> +
>>>>>>>>  #endif
>>>>>>>> diff --git a/virt/kvm/ioapic.c b/virt/kvm/ioapic.c
>>>>>>>> index 8664812..0b12b17 100644
>>>>>>>> --- a/virt/kvm/ioapic.c
>>>>>>>> +++ b/virt/kvm/ioapic.c
>>>>>>>> @@ -90,6 +90,47 @@ static unsigned long ioapic_read_indirect(struct
>>>>>>> kvm_ioapic *ioapic,
>>>>>>>>  	return result;
>>>>>>>>  }
>>>>>>>> +static void rtc_irq_reset(struct kvm_ioapic *ioapic)
>>>>>>>> +{
>>>>>>>> +	ioapic->rtc_status.pending_eoi = 0;
>>>>>>>> +	bitmap_zero(ioapic->rtc_status.dest_map, KVM_MAX_VCPUS);
>>>>>>>> +}
>>>>>>>> +
>>>>>>>> +static void rtc_irq_restore(struct kvm_ioapic *ioapic)
>>>>>>>> +{
>>>>>>>> +	struct kvm_vcpu *vcpu;
>>>>>>>> +	int vector, i, pending_eoi = 0;
>>>>>>>> +
>>>>>>>> +	if (RTC_GSI >= IOAPIC_NUM_PINS)
>>>>>>>> +		return;
>>>>>>>> +
>>>>>>>> +	vector = ioapic->redirtbl[RTC_GSI].fields.vector;
>>>>>>>> +	kvm_for_each_vcpu(i, vcpu, ioapic->kvm) {
>>>>>>>> +		if (kvm_apic_pending_eoi(vcpu, vector)) {
>>>>>>>> +			pending_eoi++;
>>>>>>>> +			__set_bit(vcpu->vcpu_id,
> ioapic->rtc_status.dest_map);
>>>>>>> You should cleat dest_map at the beginning to get rid of stale bits.
>>>>>> I thought kvm_set_ioapic is called only after save/restore or migration.
> And
>>> the
>>>>> ioapic should be reset successfully before call it. So the dest_map is empty
>>>>> before call rtc_irq_restore().
>>>>>> But it is possible kvm_set_ioapic is called beside save/restore or
>>>>>> migration. Right?
>>>>>> 
>>>>> First of all userspace should not care when it calls kvm_set_ioapic()
>>>>> the kernel need to do the right thing. Second, believe it or not,
>>>>> kvm_ioapic_reset() is not called during system reset. Instead userspace
>>>>> reset it by calling kvm_set_ioapic() with ioapic state after reset.
>>>> Ok. I see. As the logic you suggested, it will clear dest_map if no
>>>> pending eoi in vcpu, so we don't need to do it again.
>>>> 
>>> You again rely on userspace doing thing in certain manner. What is
>>> set_lapic() is never called? Kernel internal state have to be correct
>>> after each ioctl call.
>> Sorry. I cannot figure out what's the problem if don't clear dest_map?
>> Can you elaborate it?
>> 
> What is not obvious about it? If there is a bit in dest_map that should
> be cleared after rtc_irq_restore() it will not.
Why it will not? If new_val is false, and the old_val is true. __clear_bit() will clear the dest_map. Am I wrong?

new_val = kvm_apic_pending_eoi(vcpu, vector);
old_val = test_bit(vcpu->vcpu_id, ioapic->rtc_status.dest_map);

if (new_val == old_val)
        return;

if (new_val) {
        __set_bit(vcpu->vcpu_id, ioapic->rtc_status.dest_map);
        ioapic->rtc_status.pending_eoi++;
} else {
        __clear_bit(vcpu->vcpu_id, ioapic->rtc_status.dest_map);
        ioapic->rtc_status.pending_eoi--;
}

Best regards,
Yang


--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Gleb Natapov April 7, 2013, 1:32 p.m. UTC | #9
On Sun, Apr 07, 2013 at 01:16:51PM +0000, Zhang, Yang Z wrote:
> Gleb Natapov wrote on 2013-04-07:
> > On Sun, Apr 07, 2013 at 01:05:02PM +0000, Zhang, Yang Z wrote:
> >> Gleb Natapov wrote on 2013-04-07:
> >>> On Sun, Apr 07, 2013 at 12:39:32PM +0000, Zhang, Yang Z wrote:
> >>>> Gleb Natapov wrote on 2013-04-07:
> >>>>> On Sun, Apr 07, 2013 at 02:30:15AM +0000, Zhang, Yang Z wrote:
> >>>>>> Gleb Natapov wrote on 2013-04-04:
> >>>>>>> On Mon, Apr 01, 2013 at 08:40:13AM +0800, Yang Zhang wrote:
> >>>>>>>> From: Yang Zhang <yang.z.zhang@Intel.com>
> >>>>>>>> 
> >>>>>>>> Signed-off-by: Yang Zhang <yang.z.zhang@Intel.com>
> >>>>>>>> ---
> >>>>>>>>  arch/x86/kvm/lapic.c |    9 +++++++++ arch/x86/kvm/lapic.h | 2
> >>>>>>>>  ++ virt/kvm/ioapic.c    |   43
> >>>>>>>>  +++++++++++++++++++++++++++++++++++++++++++ virt/kvm/ioapic.h |
> >>>>>>>>  1 + 4 files changed, 55 insertions(+), 0 deletions(-)
> >>>>>>>> diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
> >>>>>>>> index 96ab160..9c041fa 100644
> >>>>>>>> --- a/arch/x86/kvm/lapic.c
> >>>>>>>> +++ b/arch/x86/kvm/lapic.c
> >>>>>>>> @@ -94,6 +94,14 @@ static inline int apic_test_vector(int vec, void
> >>> *bitmap)
> >>>>>>>>  	return test_bit(VEC_POS(vec), (bitmap) + REG_POS(vec));
> >>>>>>>>  }
> >>>>>>>> +bool kvm_apic_pending_eoi(struct kvm_vcpu *vcpu, int vector)
> >>>>>>>> +{
> >>>>>>>> +	struct kvm_lapic *apic = vcpu->arch.apic;
> >>>>>>>> +
> >>>>>>>> +	return apic_test_vector(vector, apic->regs + APIC_ISR) ||
> >>>>>>>> +		apic_test_vector(vector, apic->regs + APIC_IRR);
> >>>>>>>> +}
> >>>>>>>> +
> >>>>>>>>  static inline void apic_set_vector(int vec, void *bitmap)
> >>>>>>>>  {
> >>>>>>>>  	set_bit(VEC_POS(vec), (bitmap) + REG_POS(vec));
> >>>>>>>> @@ -1665,6 +1673,7 @@ void kvm_apic_post_state_restore(struct
> >>>>> kvm_vcpu
> >>>>>>> *vcpu,
> >>>>>>>>  	apic->highest_isr_cache = -1;
> >>>>>>>>  	kvm_x86_ops->hwapic_isr_update(vcpu->kvm,
> >>>>>>>>  apic_find_highest_isr(apic)); 	kvm_make_request(KVM_REQ_EVENT,
> >>>>>>>>  vcpu); +	kvm_rtc_irq_restore(vcpu); }
> >>>>>>>>  
> >>>>>>>>  void __kvm_migrate_apic_timer(struct kvm_vcpu *vcpu)
> >>>>>>>> diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h
> >>>>>>>> index 967519c..004d2ad 100644
> >>>>>>>> --- a/arch/x86/kvm/lapic.h
> >>>>>>>> +++ b/arch/x86/kvm/lapic.h
> >>>>>>>> @@ -170,4 +170,6 @@ static inline bool kvm_apic_has_events(struct
> >>>>>>> kvm_vcpu *vcpu)
> >>>>>>>>  	return vcpu->arch.apic->pending_events;
> >>>>>>>>  }
> >>>>>>>> +bool kvm_apic_pending_eoi(struct kvm_vcpu *vcpu, int vector);
> >>>>>>>> +
> >>>>>>>>  #endif
> >>>>>>>> diff --git a/virt/kvm/ioapic.c b/virt/kvm/ioapic.c
> >>>>>>>> index 8664812..0b12b17 100644
> >>>>>>>> --- a/virt/kvm/ioapic.c
> >>>>>>>> +++ b/virt/kvm/ioapic.c
> >>>>>>>> @@ -90,6 +90,47 @@ static unsigned long ioapic_read_indirect(struct
> >>>>>>> kvm_ioapic *ioapic,
> >>>>>>>>  	return result;
> >>>>>>>>  }
> >>>>>>>> +static void rtc_irq_reset(struct kvm_ioapic *ioapic)
> >>>>>>>> +{
> >>>>>>>> +	ioapic->rtc_status.pending_eoi = 0;
> >>>>>>>> +	bitmap_zero(ioapic->rtc_status.dest_map, KVM_MAX_VCPUS);
> >>>>>>>> +}
> >>>>>>>> +
> >>>>>>>> +static void rtc_irq_restore(struct kvm_ioapic *ioapic)
> >>>>>>>> +{
> >>>>>>>> +	struct kvm_vcpu *vcpu;
> >>>>>>>> +	int vector, i, pending_eoi = 0;
> >>>>>>>> +
> >>>>>>>> +	if (RTC_GSI >= IOAPIC_NUM_PINS)
> >>>>>>>> +		return;
> >>>>>>>> +
> >>>>>>>> +	vector = ioapic->redirtbl[RTC_GSI].fields.vector;
> >>>>>>>> +	kvm_for_each_vcpu(i, vcpu, ioapic->kvm) {
> >>>>>>>> +		if (kvm_apic_pending_eoi(vcpu, vector)) {
> >>>>>>>> +			pending_eoi++;
> >>>>>>>> +			__set_bit(vcpu->vcpu_id,
> > ioapic->rtc_status.dest_map);
> >>>>>>> You should cleat dest_map at the beginning to get rid of stale bits.
> >>>>>> I thought kvm_set_ioapic is called only after save/restore or migration.
> > And
> >>> the
> >>>>> ioapic should be reset successfully before call it. So the dest_map is empty
> >>>>> before call rtc_irq_restore().
> >>>>>> But it is possible kvm_set_ioapic is called beside save/restore or
> >>>>>> migration. Right?
> >>>>>> 
> >>>>> First of all userspace should not care when it calls kvm_set_ioapic()
> >>>>> the kernel need to do the right thing. Second, believe it or not,
> >>>>> kvm_ioapic_reset() is not called during system reset. Instead userspace
> >>>>> reset it by calling kvm_set_ioapic() with ioapic state after reset.
> >>>> Ok. I see. As the logic you suggested, it will clear dest_map if no
> >>>> pending eoi in vcpu, so we don't need to do it again.
> >>>> 
> >>> You again rely on userspace doing thing in certain manner. What is
> >>> set_lapic() is never called? Kernel internal state have to be correct
> >>> after each ioctl call.
> >> Sorry. I cannot figure out what's the problem if don't clear dest_map?
> >> Can you elaborate it?
> >> 
> > What is not obvious about it? If there is a bit in dest_map that should
> > be cleared after rtc_irq_restore() it will not.
> Why it will not? If new_val is false, and the old_val is true. __clear_bit() will clear the dest_map. Am I wrong?
> 
Ah, yes with new logic since we go over all vcpus and calculate new
value for each one in theory it should be fine, but if we add cpu
destruction this will be no longer true.

> new_val = kvm_apic_pending_eoi(vcpu, vector);
Which reminds me there are more bugs in the current code. It is not
enough to call kvm_apic_pending_eoi() to check the new value. You need to
see if the entry is masked and vcpu is the destination of the interrupt too.
 
> old_val = test_bit(vcpu->vcpu_id, ioapic->rtc_status.dest_map);
> 
> if (new_val == old_val)
>         return;
> 
> if (new_val) {
>         __set_bit(vcpu->vcpu_id, ioapic->rtc_status.dest_map);
>         ioapic->rtc_status.pending_eoi++;
> } else {
>         __clear_bit(vcpu->vcpu_id, ioapic->rtc_status.dest_map);
>         ioapic->rtc_status.pending_eoi--;
> }
> 
> Best regards,
> Yang
> 

--
			Gleb.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Zhang, Yang Z April 7, 2013, 1:46 p.m. UTC | #10
Gleb Natapov wrote on 2013-04-07:
> On Sun, Apr 07, 2013 at 01:16:51PM +0000, Zhang, Yang Z wrote:
>> Gleb Natapov wrote on 2013-04-07:
>>> On Sun, Apr 07, 2013 at 01:05:02PM +0000, Zhang, Yang Z wrote:
>>>> Gleb Natapov wrote on 2013-04-07:
>>>>> On Sun, Apr 07, 2013 at 12:39:32PM +0000, Zhang, Yang Z wrote:
>>>>>> Gleb Natapov wrote on 2013-04-07:
>>>>>>> On Sun, Apr 07, 2013 at 02:30:15AM +0000, Zhang, Yang Z wrote:
>>>>>>>> Gleb Natapov wrote on 2013-04-04:
>>>>>>>>> On Mon, Apr 01, 2013 at 08:40:13AM +0800, Yang Zhang wrote:
>>>>>>>>>> From: Yang Zhang <yang.z.zhang@Intel.com>
>>>>>>>>>> 
>>>>>>>>>> Signed-off-by: Yang Zhang <yang.z.zhang@Intel.com>
>>>>>>>>>> ---
>>>>>>>>>>  arch/x86/kvm/lapic.c |    9 +++++++++ arch/x86/kvm/lapic.h | 2
>>>>>>>>>>  ++ virt/kvm/ioapic.c    |   43
>>>>>>>>>>  +++++++++++++++++++++++++++++++++++++++++++ virt/kvm/ioapic.h
>>>>>>>>>>  | 1 + 4 files changed, 55 insertions(+), 0 deletions(-)
>>>>>>>>>> diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
>>>>>>>>>> index 96ab160..9c041fa 100644
>>>>>>>>>> --- a/arch/x86/kvm/lapic.c
>>>>>>>>>> +++ b/arch/x86/kvm/lapic.c
>>>>>>>>>> @@ -94,6 +94,14 @@ static inline int apic_test_vector(int vec, void
>>>>> *bitmap)
>>>>>>>>>>  	return test_bit(VEC_POS(vec), (bitmap) + REG_POS(vec));
>>>>>>>>>>  }
>>>>>>>>>> +bool kvm_apic_pending_eoi(struct kvm_vcpu *vcpu, int vector)
>>>>>>>>>> +{
>>>>>>>>>> +	struct kvm_lapic *apic = vcpu->arch.apic;
>>>>>>>>>> +
>>>>>>>>>> +	return apic_test_vector(vector, apic->regs + APIC_ISR) ||
>>>>>>>>>> +		apic_test_vector(vector, apic->regs + APIC_IRR);
>>>>>>>>>> +}
>>>>>>>>>> +
>>>>>>>>>>  static inline void apic_set_vector(int vec, void *bitmap)
>>>>>>>>>>  {
>>>>>>>>>>  	set_bit(VEC_POS(vec), (bitmap) + REG_POS(vec));
>>>>>>>>>> @@ -1665,6 +1673,7 @@ void kvm_apic_post_state_restore(struct
>>>>>>> kvm_vcpu
>>>>>>>>> *vcpu,
>>>>>>>>>>  	apic->highest_isr_cache = -1;
>>>>>>>>>>  	kvm_x86_ops->hwapic_isr_update(vcpu->kvm,
>>>>>>>>>>  apic_find_highest_isr(apic)); 	kvm_make_request(KVM_REQ_EVENT,
>>>>>>>>>>  vcpu); +	kvm_rtc_irq_restore(vcpu); }
>>>>>>>>>>  
>>>>>>>>>>  void __kvm_migrate_apic_timer(struct kvm_vcpu *vcpu)
>>>>>>>>>> diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h
>>>>>>>>>> index 967519c..004d2ad 100644
>>>>>>>>>> --- a/arch/x86/kvm/lapic.h
>>>>>>>>>> +++ b/arch/x86/kvm/lapic.h
>>>>>>>>>> @@ -170,4 +170,6 @@ static inline bool
> kvm_apic_has_events(struct
>>>>>>>>> kvm_vcpu *vcpu)
>>>>>>>>>>  	return vcpu->arch.apic->pending_events;
>>>>>>>>>>  }
>>>>>>>>>> +bool kvm_apic_pending_eoi(struct kvm_vcpu *vcpu, int vector);
>>>>>>>>>> +
>>>>>>>>>>  #endif
>>>>>>>>>> diff --git a/virt/kvm/ioapic.c b/virt/kvm/ioapic.c
>>>>>>>>>> index 8664812..0b12b17 100644
>>>>>>>>>> --- a/virt/kvm/ioapic.c
>>>>>>>>>> +++ b/virt/kvm/ioapic.c
>>>>>>>>>> @@ -90,6 +90,47 @@ static unsigned long
> ioapic_read_indirect(struct
>>>>>>>>> kvm_ioapic *ioapic,
>>>>>>>>>>  	return result;
>>>>>>>>>>  }
>>>>>>>>>> +static void rtc_irq_reset(struct kvm_ioapic *ioapic)
>>>>>>>>>> +{
>>>>>>>>>> +	ioapic->rtc_status.pending_eoi = 0;
>>>>>>>>>> +	bitmap_zero(ioapic->rtc_status.dest_map, KVM_MAX_VCPUS);
>>>>>>>>>> +}
>>>>>>>>>> +
>>>>>>>>>> +static void rtc_irq_restore(struct kvm_ioapic *ioapic)
>>>>>>>>>> +{
>>>>>>>>>> +	struct kvm_vcpu *vcpu;
>>>>>>>>>> +	int vector, i, pending_eoi = 0;
>>>>>>>>>> +
>>>>>>>>>> +	if (RTC_GSI >= IOAPIC_NUM_PINS)
>>>>>>>>>> +		return;
>>>>>>>>>> +
>>>>>>>>>> +	vector = ioapic->redirtbl[RTC_GSI].fields.vector;
>>>>>>>>>> +	kvm_for_each_vcpu(i, vcpu, ioapic->kvm) {
>>>>>>>>>> +		if (kvm_apic_pending_eoi(vcpu, vector)) {
>>>>>>>>>> +			pending_eoi++;
>>>>>>>>>> +			__set_bit(vcpu->vcpu_id,
>>> ioapic->rtc_status.dest_map);
>>>>>>>>> You should cleat dest_map at the beginning to get rid of stale bits.
>>>>>>>> I thought kvm_set_ioapic is called only after save/restore or migration.
>>> And
>>>>> the
>>>>>>> ioapic should be reset successfully before call it. So the
>>>>>>> dest_map is empty before call rtc_irq_restore().
>>>>>>>> But it is possible kvm_set_ioapic is called beside save/restore or
>>>>>>>> migration. Right?
>>>>>>>> 
>>>>>>> First of all userspace should not care when it calls kvm_set_ioapic()
>>>>>>> the kernel need to do the right thing. Second, believe it or not,
>>>>>>> kvm_ioapic_reset() is not called during system reset. Instead userspace
>>>>>>> reset it by calling kvm_set_ioapic() with ioapic state after reset.
>>>>>> Ok. I see. As the logic you suggested, it will clear dest_map if no
>>>>>> pending eoi in vcpu, so we don't need to do it again.
>>>>>> 
>>>>> You again rely on userspace doing thing in certain manner. What is
>>>>> set_lapic() is never called? Kernel internal state have to be correct
>>>>> after each ioctl call.
>>>> Sorry. I cannot figure out what's the problem if don't clear dest_map?
>>>> Can you elaborate it?
>>>> 
>>> What is not obvious about it? If there is a bit in dest_map that should
>>> be cleared after rtc_irq_restore() it will not.
>> Why it will not? If new_val is false, and the old_val is true.
>> __clear_bit() will clear the dest_map. Am I wrong?
>> 
> Ah, yes with new logic since we go over all vcpus and calculate new
> value for each one in theory it should be fine, but if we add cpu
> destruction this will be no longer true.
Yes. Given cpu destruction, it is wrong. Do you think we should clear dest_map now or delay it until cpu destruction is ready?

> 
>> new_val = kvm_apic_pending_eoi(vcpu, vector);
> Which reminds me there are more bugs in the current code. It is not
> enough to call kvm_apic_pending_eoi() to check the new value. You need to
> see if the entry is masked and vcpu is the destination of the interrupt too.
Yes, you are right.

> 
>> old_val = test_bit(vcpu->vcpu_id, ioapic->rtc_status.dest_map);
>> 
>> if (new_val == old_val)
>>         return;
>> if (new_val) {
>>         __set_bit(vcpu->vcpu_id, ioapic->rtc_status.dest_map);
>>         ioapic->rtc_status.pending_eoi++; } else {
>>         __clear_bit(vcpu->vcpu_id, ioapic->rtc_status.dest_map);
>>         ioapic->rtc_status.pending_eoi--;
>> }
>> 
>> Best regards,
>> Yang
>> 
> 
> --
> 			Gleb.


Best regards,
Yang


--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Gleb Natapov April 7, 2013, 1:55 p.m. UTC | #11
On Sun, Apr 07, 2013 at 01:46:57PM +0000, Zhang, Yang Z wrote:
> Gleb Natapov wrote on 2013-04-07:
> > On Sun, Apr 07, 2013 at 01:16:51PM +0000, Zhang, Yang Z wrote:
> >> Gleb Natapov wrote on 2013-04-07:
> >>> On Sun, Apr 07, 2013 at 01:05:02PM +0000, Zhang, Yang Z wrote:
> >>>> Gleb Natapov wrote on 2013-04-07:
> >>>>> On Sun, Apr 07, 2013 at 12:39:32PM +0000, Zhang, Yang Z wrote:
> >>>>>> Gleb Natapov wrote on 2013-04-07:
> >>>>>>> On Sun, Apr 07, 2013 at 02:30:15AM +0000, Zhang, Yang Z wrote:
> >>>>>>>> Gleb Natapov wrote on 2013-04-04:
> >>>>>>>>> On Mon, Apr 01, 2013 at 08:40:13AM +0800, Yang Zhang wrote:
> >>>>>>>>>> From: Yang Zhang <yang.z.zhang@Intel.com>
> >>>>>>>>>> 
> >>>>>>>>>> Signed-off-by: Yang Zhang <yang.z.zhang@Intel.com>
> >>>>>>>>>> ---
> >>>>>>>>>>  arch/x86/kvm/lapic.c |    9 +++++++++ arch/x86/kvm/lapic.h | 2
> >>>>>>>>>>  ++ virt/kvm/ioapic.c    |   43
> >>>>>>>>>>  +++++++++++++++++++++++++++++++++++++++++++ virt/kvm/ioapic.h
> >>>>>>>>>>  | 1 + 4 files changed, 55 insertions(+), 0 deletions(-)
> >>>>>>>>>> diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
> >>>>>>>>>> index 96ab160..9c041fa 100644
> >>>>>>>>>> --- a/arch/x86/kvm/lapic.c
> >>>>>>>>>> +++ b/arch/x86/kvm/lapic.c
> >>>>>>>>>> @@ -94,6 +94,14 @@ static inline int apic_test_vector(int vec, void
> >>>>> *bitmap)
> >>>>>>>>>>  	return test_bit(VEC_POS(vec), (bitmap) + REG_POS(vec));
> >>>>>>>>>>  }
> >>>>>>>>>> +bool kvm_apic_pending_eoi(struct kvm_vcpu *vcpu, int vector)
> >>>>>>>>>> +{
> >>>>>>>>>> +	struct kvm_lapic *apic = vcpu->arch.apic;
> >>>>>>>>>> +
> >>>>>>>>>> +	return apic_test_vector(vector, apic->regs + APIC_ISR) ||
> >>>>>>>>>> +		apic_test_vector(vector, apic->regs + APIC_IRR);
> >>>>>>>>>> +}
> >>>>>>>>>> +
> >>>>>>>>>>  static inline void apic_set_vector(int vec, void *bitmap)
> >>>>>>>>>>  {
> >>>>>>>>>>  	set_bit(VEC_POS(vec), (bitmap) + REG_POS(vec));
> >>>>>>>>>> @@ -1665,6 +1673,7 @@ void kvm_apic_post_state_restore(struct
> >>>>>>> kvm_vcpu
> >>>>>>>>> *vcpu,
> >>>>>>>>>>  	apic->highest_isr_cache = -1;
> >>>>>>>>>>  	kvm_x86_ops->hwapic_isr_update(vcpu->kvm,
> >>>>>>>>>>  apic_find_highest_isr(apic)); 	kvm_make_request(KVM_REQ_EVENT,
> >>>>>>>>>>  vcpu); +	kvm_rtc_irq_restore(vcpu); }
> >>>>>>>>>>  
> >>>>>>>>>>  void __kvm_migrate_apic_timer(struct kvm_vcpu *vcpu)
> >>>>>>>>>> diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h
> >>>>>>>>>> index 967519c..004d2ad 100644
> >>>>>>>>>> --- a/arch/x86/kvm/lapic.h
> >>>>>>>>>> +++ b/arch/x86/kvm/lapic.h
> >>>>>>>>>> @@ -170,4 +170,6 @@ static inline bool
> > kvm_apic_has_events(struct
> >>>>>>>>> kvm_vcpu *vcpu)
> >>>>>>>>>>  	return vcpu->arch.apic->pending_events;
> >>>>>>>>>>  }
> >>>>>>>>>> +bool kvm_apic_pending_eoi(struct kvm_vcpu *vcpu, int vector);
> >>>>>>>>>> +
> >>>>>>>>>>  #endif
> >>>>>>>>>> diff --git a/virt/kvm/ioapic.c b/virt/kvm/ioapic.c
> >>>>>>>>>> index 8664812..0b12b17 100644
> >>>>>>>>>> --- a/virt/kvm/ioapic.c
> >>>>>>>>>> +++ b/virt/kvm/ioapic.c
> >>>>>>>>>> @@ -90,6 +90,47 @@ static unsigned long
> > ioapic_read_indirect(struct
> >>>>>>>>> kvm_ioapic *ioapic,
> >>>>>>>>>>  	return result;
> >>>>>>>>>>  }
> >>>>>>>>>> +static void rtc_irq_reset(struct kvm_ioapic *ioapic)
> >>>>>>>>>> +{
> >>>>>>>>>> +	ioapic->rtc_status.pending_eoi = 0;
> >>>>>>>>>> +	bitmap_zero(ioapic->rtc_status.dest_map, KVM_MAX_VCPUS);
> >>>>>>>>>> +}
> >>>>>>>>>> +
> >>>>>>>>>> +static void rtc_irq_restore(struct kvm_ioapic *ioapic)
> >>>>>>>>>> +{
> >>>>>>>>>> +	struct kvm_vcpu *vcpu;
> >>>>>>>>>> +	int vector, i, pending_eoi = 0;
> >>>>>>>>>> +
> >>>>>>>>>> +	if (RTC_GSI >= IOAPIC_NUM_PINS)
> >>>>>>>>>> +		return;
> >>>>>>>>>> +
> >>>>>>>>>> +	vector = ioapic->redirtbl[RTC_GSI].fields.vector;
> >>>>>>>>>> +	kvm_for_each_vcpu(i, vcpu, ioapic->kvm) {
> >>>>>>>>>> +		if (kvm_apic_pending_eoi(vcpu, vector)) {
> >>>>>>>>>> +			pending_eoi++;
> >>>>>>>>>> +			__set_bit(vcpu->vcpu_id,
> >>> ioapic->rtc_status.dest_map);
> >>>>>>>>> You should cleat dest_map at the beginning to get rid of stale bits.
> >>>>>>>> I thought kvm_set_ioapic is called only after save/restore or migration.
> >>> And
> >>>>> the
> >>>>>>> ioapic should be reset successfully before call it. So the
> >>>>>>> dest_map is empty before call rtc_irq_restore().
> >>>>>>>> But it is possible kvm_set_ioapic is called beside save/restore or
> >>>>>>>> migration. Right?
> >>>>>>>> 
> >>>>>>> First of all userspace should not care when it calls kvm_set_ioapic()
> >>>>>>> the kernel need to do the right thing. Second, believe it or not,
> >>>>>>> kvm_ioapic_reset() is not called during system reset. Instead userspace
> >>>>>>> reset it by calling kvm_set_ioapic() with ioapic state after reset.
> >>>>>> Ok. I see. As the logic you suggested, it will clear dest_map if no
> >>>>>> pending eoi in vcpu, so we don't need to do it again.
> >>>>>> 
> >>>>> You again rely on userspace doing thing in certain manner. What is
> >>>>> set_lapic() is never called? Kernel internal state have to be correct
> >>>>> after each ioctl call.
> >>>> Sorry. I cannot figure out what's the problem if don't clear dest_map?
> >>>> Can you elaborate it?
> >>>> 
> >>> What is not obvious about it? If there is a bit in dest_map that should
> >>> be cleared after rtc_irq_restore() it will not.
> >> Why it will not? If new_val is false, and the old_val is true.
> >> __clear_bit() will clear the dest_map. Am I wrong?
> >> 
> > Ah, yes with new logic since we go over all vcpus and calculate new
> > value for each one in theory it should be fine, but if we add cpu
> > destruction this will be no longer true.
> Yes. Given cpu destruction, it is wrong. Do you think we should clear dest_map now or delay it until cpu destruction is ready?
> 
We will surely forgot it at that point and this is slow path. Lets clear
it.

> > 
> >> new_val = kvm_apic_pending_eoi(vcpu, vector);
> > Which reminds me there are more bugs in the current code. It is not
> > enough to call kvm_apic_pending_eoi() to check the new value. You need to
> > see if the entry is masked and vcpu is the destination of the interrupt too.
> Yes, you are right.
> 
> > 
> >> old_val = test_bit(vcpu->vcpu_id, ioapic->rtc_status.dest_map);
> >> 
> >> if (new_val == old_val)
> >>         return;
> >> if (new_val) {
> >>         __set_bit(vcpu->vcpu_id, ioapic->rtc_status.dest_map);
> >>         ioapic->rtc_status.pending_eoi++; } else {
> >>         __clear_bit(vcpu->vcpu_id, ioapic->rtc_status.dest_map);
> >>         ioapic->rtc_status.pending_eoi--;
> >> }
> >> 
> >> Best regards,
> >> Yang
> >> 
> > 
> > --
> > 			Gleb.
> 
> 
> Best regards,
> Yang
> 

--
			Gleb.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Zhang, Yang Z April 8, 2013, 11:21 a.m. UTC | #12
Gleb Natapov wrote on 2013-04-07:
> On Sun, Apr 07, 2013 at 01:16:51PM +0000, Zhang, Yang Z wrote:
>> Gleb Natapov wrote on 2013-04-07:
>>> On Sun, Apr 07, 2013 at 01:05:02PM +0000, Zhang, Yang Z wrote:
>>>> Gleb Natapov wrote on 2013-04-07:
>>>>> On Sun, Apr 07, 2013 at 12:39:32PM +0000, Zhang, Yang Z wrote:
>>>>>> Gleb Natapov wrote on 2013-04-07:
>>>>>>> On Sun, Apr 07, 2013 at 02:30:15AM +0000, Zhang, Yang Z wrote:
>>>>>>>> Gleb Natapov wrote on 2013-04-04:
>>>>>>>>> On Mon, Apr 01, 2013 at 08:40:13AM +0800, Yang Zhang wrote:
>>>>>>>>>> From: Yang Zhang <yang.z.zhang@Intel.com>
>>>>>>>>>> 
>>>>>>>>>> Signed-off-by: Yang Zhang <yang.z.zhang@Intel.com>
>>>>>>>>>> ---
>>>>>>>>>>  arch/x86/kvm/lapic.c |    9 +++++++++ arch/x86/kvm/lapic.h | 2
>>>>>>>>>>  ++ virt/kvm/ioapic.c    |   43
>>>>>>>>>>  +++++++++++++++++++++++++++++++++++++++++++ virt/kvm/ioapic.h
>>>>>>>>>>  | 1 + 4 files changed, 55 insertions(+), 0 deletions(-)
>>>>>>>>>> diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
>>>>>>>>>> index 96ab160..9c041fa 100644
>>>>>>>>>> --- a/arch/x86/kvm/lapic.c
>>>>>>>>>> +++ b/arch/x86/kvm/lapic.c
>>>>>>>>>> @@ -94,6 +94,14 @@ static inline int apic_test_vector(int vec, void
>>>>> *bitmap)
>>>>>>>>>>  	return test_bit(VEC_POS(vec), (bitmap) + REG_POS(vec));
>>>>>>>>>>  }
>>>>>>>>>> +bool kvm_apic_pending_eoi(struct kvm_vcpu *vcpu, int vector)
>>>>>>>>>> +{
>>>>>>>>>> +	struct kvm_lapic *apic = vcpu->arch.apic;
>>>>>>>>>> +
>>>>>>>>>> +	return apic_test_vector(vector, apic->regs + APIC_ISR) ||
>>>>>>>>>> +		apic_test_vector(vector, apic->regs + APIC_IRR);
>>>>>>>>>> +}
>>>>>>>>>> +
>>>>>>>>>>  static inline void apic_set_vector(int vec, void *bitmap)
>>>>>>>>>>  {
>>>>>>>>>>  	set_bit(VEC_POS(vec), (bitmap) + REG_POS(vec));
>>>>>>>>>> @@ -1665,6 +1673,7 @@ void kvm_apic_post_state_restore(struct
>>>>>>> kvm_vcpu
>>>>>>>>> *vcpu,
>>>>>>>>>>  	apic->highest_isr_cache = -1;
>>>>>>>>>>  	kvm_x86_ops->hwapic_isr_update(vcpu->kvm,
>>>>>>>>>>  apic_find_highest_isr(apic)); 	kvm_make_request(KVM_REQ_EVENT,
>>>>>>>>>>  vcpu); +	kvm_rtc_irq_restore(vcpu); }
>>>>>>>>>>  
>>>>>>>>>>  void __kvm_migrate_apic_timer(struct kvm_vcpu *vcpu)
>>>>>>>>>> diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h
>>>>>>>>>> index 967519c..004d2ad 100644
>>>>>>>>>> --- a/arch/x86/kvm/lapic.h
>>>>>>>>>> +++ b/arch/x86/kvm/lapic.h
>>>>>>>>>> @@ -170,4 +170,6 @@ static inline bool
> kvm_apic_has_events(struct
>>>>>>>>> kvm_vcpu *vcpu)
>>>>>>>>>>  	return vcpu->arch.apic->pending_events;
>>>>>>>>>>  }
>>>>>>>>>> +bool kvm_apic_pending_eoi(struct kvm_vcpu *vcpu, int vector);
>>>>>>>>>> +
>>>>>>>>>>  #endif
>>>>>>>>>> diff --git a/virt/kvm/ioapic.c b/virt/kvm/ioapic.c
>>>>>>>>>> index 8664812..0b12b17 100644
>>>>>>>>>> --- a/virt/kvm/ioapic.c
>>>>>>>>>> +++ b/virt/kvm/ioapic.c
>>>>>>>>>> @@ -90,6 +90,47 @@ static unsigned long
> ioapic_read_indirect(struct
>>>>>>>>> kvm_ioapic *ioapic,
>>>>>>>>>>  	return result;
>>>>>>>>>>  }
>>>>>>>>>> +static void rtc_irq_reset(struct kvm_ioapic *ioapic)
>>>>>>>>>> +{
>>>>>>>>>> +	ioapic->rtc_status.pending_eoi = 0;
>>>>>>>>>> +	bitmap_zero(ioapic->rtc_status.dest_map, KVM_MAX_VCPUS);
>>>>>>>>>> +}
>>>>>>>>>> +
>>>>>>>>>> +static void rtc_irq_restore(struct kvm_ioapic *ioapic)
>>>>>>>>>> +{
>>>>>>>>>> +	struct kvm_vcpu *vcpu;
>>>>>>>>>> +	int vector, i, pending_eoi = 0;
>>>>>>>>>> +
>>>>>>>>>> +	if (RTC_GSI >= IOAPIC_NUM_PINS)
>>>>>>>>>> +		return;
>>>>>>>>>> +
>>>>>>>>>> +	vector = ioapic->redirtbl[RTC_GSI].fields.vector;
>>>>>>>>>> +	kvm_for_each_vcpu(i, vcpu, ioapic->kvm) {
>>>>>>>>>> +		if (kvm_apic_pending_eoi(vcpu, vector)) {
>>>>>>>>>> +			pending_eoi++;
>>>>>>>>>> +			__set_bit(vcpu->vcpu_id,
>>> ioapic->rtc_status.dest_map);
>>>>>>>>> You should cleat dest_map at the beginning to get rid of stale bits.
>>>>>>>> I thought kvm_set_ioapic is called only after save/restore or migration.
>>> And
>>>>> the
>>>>>>> ioapic should be reset successfully before call it. So the
>>>>>>> dest_map is empty before call rtc_irq_restore().
>>>>>>>> But it is possible kvm_set_ioapic is called beside save/restore or
>>>>>>>> migration. Right?
>>>>>>>> 
>>>>>>> First of all userspace should not care when it calls kvm_set_ioapic()
>>>>>>> the kernel need to do the right thing. Second, believe it or not,
>>>>>>> kvm_ioapic_reset() is not called during system reset. Instead userspace
>>>>>>> reset it by calling kvm_set_ioapic() with ioapic state after reset.
>>>>>> Ok. I see. As the logic you suggested, it will clear dest_map if no
>>>>>> pending eoi in vcpu, so we don't need to do it again.
>>>>>> 
>>>>> You again rely on userspace doing thing in certain manner. What is
>>>>> set_lapic() is never called? Kernel internal state have to be correct
>>>>> after each ioctl call.
>>>> Sorry. I cannot figure out what's the problem if don't clear dest_map?
>>>> Can you elaborate it?
>>>> 
>>> What is not obvious about it? If there is a bit in dest_map that should
>>> be cleared after rtc_irq_restore() it will not.
>> Why it will not? If new_val is false, and the old_val is true.
>> __clear_bit() will clear the dest_map. Am I wrong?
>> 
> Ah, yes with new logic since we go over all vcpus and calculate new
> value for each one in theory it should be fine, but if we add cpu
> destruction this will be no longer true.
> 
>> new_val = kvm_apic_pending_eoi(vcpu, vector);
> Which reminds me there are more bugs in the current code. It is not
> enough to call kvm_apic_pending_eoi() to check the new value. You need to
> see if the entry is masked and vcpu is the destination of the interrupt too.
No. kvm_apic_pending_eoi() is the right way. IOAPIC entry may change after vcpu received it before issue EOI, and we should not rely on the entry.
For example:
vcpu A received the interrupt, pending in IRR
mask entry
migration happened

The only problem is we may account the interrupt from non-IOAPIC(from IPI) into RTC interrupt. But it's ok, we will clear pending_eoi in EOI regardless of source.

Best regards,
Yang


--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Gleb Natapov April 8, 2013, 12:45 p.m. UTC | #13
On Mon, Apr 08, 2013 at 11:21:34AM +0000, Zhang, Yang Z wrote:
> Gleb Natapov wrote on 2013-04-07:
> > On Sun, Apr 07, 2013 at 01:16:51PM +0000, Zhang, Yang Z wrote:
> >> Gleb Natapov wrote on 2013-04-07:
> >>> On Sun, Apr 07, 2013 at 01:05:02PM +0000, Zhang, Yang Z wrote:
> >>>> Gleb Natapov wrote on 2013-04-07:
> >>>>> On Sun, Apr 07, 2013 at 12:39:32PM +0000, Zhang, Yang Z wrote:
> >>>>>> Gleb Natapov wrote on 2013-04-07:
> >>>>>>> On Sun, Apr 07, 2013 at 02:30:15AM +0000, Zhang, Yang Z wrote:
> >>>>>>>> Gleb Natapov wrote on 2013-04-04:
> >>>>>>>>> On Mon, Apr 01, 2013 at 08:40:13AM +0800, Yang Zhang wrote:
> >>>>>>>>>> From: Yang Zhang <yang.z.zhang@Intel.com>
> >>>>>>>>>> 
> >>>>>>>>>> Signed-off-by: Yang Zhang <yang.z.zhang@Intel.com>
> >>>>>>>>>> ---
> >>>>>>>>>>  arch/x86/kvm/lapic.c |    9 +++++++++ arch/x86/kvm/lapic.h | 2
> >>>>>>>>>>  ++ virt/kvm/ioapic.c    |   43
> >>>>>>>>>>  +++++++++++++++++++++++++++++++++++++++++++ virt/kvm/ioapic.h
> >>>>>>>>>>  | 1 + 4 files changed, 55 insertions(+), 0 deletions(-)
> >>>>>>>>>> diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
> >>>>>>>>>> index 96ab160..9c041fa 100644
> >>>>>>>>>> --- a/arch/x86/kvm/lapic.c
> >>>>>>>>>> +++ b/arch/x86/kvm/lapic.c
> >>>>>>>>>> @@ -94,6 +94,14 @@ static inline int apic_test_vector(int vec, void
> >>>>> *bitmap)
> >>>>>>>>>>  	return test_bit(VEC_POS(vec), (bitmap) + REG_POS(vec));
> >>>>>>>>>>  }
> >>>>>>>>>> +bool kvm_apic_pending_eoi(struct kvm_vcpu *vcpu, int vector)
> >>>>>>>>>> +{
> >>>>>>>>>> +	struct kvm_lapic *apic = vcpu->arch.apic;
> >>>>>>>>>> +
> >>>>>>>>>> +	return apic_test_vector(vector, apic->regs + APIC_ISR) ||
> >>>>>>>>>> +		apic_test_vector(vector, apic->regs + APIC_IRR);
> >>>>>>>>>> +}
> >>>>>>>>>> +
> >>>>>>>>>>  static inline void apic_set_vector(int vec, void *bitmap)
> >>>>>>>>>>  {
> >>>>>>>>>>  	set_bit(VEC_POS(vec), (bitmap) + REG_POS(vec));
> >>>>>>>>>> @@ -1665,6 +1673,7 @@ void kvm_apic_post_state_restore(struct
> >>>>>>> kvm_vcpu
> >>>>>>>>> *vcpu,
> >>>>>>>>>>  	apic->highest_isr_cache = -1;
> >>>>>>>>>>  	kvm_x86_ops->hwapic_isr_update(vcpu->kvm,
> >>>>>>>>>>  apic_find_highest_isr(apic)); 	kvm_make_request(KVM_REQ_EVENT,
> >>>>>>>>>>  vcpu); +	kvm_rtc_irq_restore(vcpu); }
> >>>>>>>>>>  
> >>>>>>>>>>  void __kvm_migrate_apic_timer(struct kvm_vcpu *vcpu)
> >>>>>>>>>> diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h
> >>>>>>>>>> index 967519c..004d2ad 100644
> >>>>>>>>>> --- a/arch/x86/kvm/lapic.h
> >>>>>>>>>> +++ b/arch/x86/kvm/lapic.h
> >>>>>>>>>> @@ -170,4 +170,6 @@ static inline bool
> > kvm_apic_has_events(struct
> >>>>>>>>> kvm_vcpu *vcpu)
> >>>>>>>>>>  	return vcpu->arch.apic->pending_events;
> >>>>>>>>>>  }
> >>>>>>>>>> +bool kvm_apic_pending_eoi(struct kvm_vcpu *vcpu, int vector);
> >>>>>>>>>> +
> >>>>>>>>>>  #endif
> >>>>>>>>>> diff --git a/virt/kvm/ioapic.c b/virt/kvm/ioapic.c
> >>>>>>>>>> index 8664812..0b12b17 100644
> >>>>>>>>>> --- a/virt/kvm/ioapic.c
> >>>>>>>>>> +++ b/virt/kvm/ioapic.c
> >>>>>>>>>> @@ -90,6 +90,47 @@ static unsigned long
> > ioapic_read_indirect(struct
> >>>>>>>>> kvm_ioapic *ioapic,
> >>>>>>>>>>  	return result;
> >>>>>>>>>>  }
> >>>>>>>>>> +static void rtc_irq_reset(struct kvm_ioapic *ioapic)
> >>>>>>>>>> +{
> >>>>>>>>>> +	ioapic->rtc_status.pending_eoi = 0;
> >>>>>>>>>> +	bitmap_zero(ioapic->rtc_status.dest_map, KVM_MAX_VCPUS);
> >>>>>>>>>> +}
> >>>>>>>>>> +
> >>>>>>>>>> +static void rtc_irq_restore(struct kvm_ioapic *ioapic)
> >>>>>>>>>> +{
> >>>>>>>>>> +	struct kvm_vcpu *vcpu;
> >>>>>>>>>> +	int vector, i, pending_eoi = 0;
> >>>>>>>>>> +
> >>>>>>>>>> +	if (RTC_GSI >= IOAPIC_NUM_PINS)
> >>>>>>>>>> +		return;
> >>>>>>>>>> +
> >>>>>>>>>> +	vector = ioapic->redirtbl[RTC_GSI].fields.vector;
> >>>>>>>>>> +	kvm_for_each_vcpu(i, vcpu, ioapic->kvm) {
> >>>>>>>>>> +		if (kvm_apic_pending_eoi(vcpu, vector)) {
> >>>>>>>>>> +			pending_eoi++;
> >>>>>>>>>> +			__set_bit(vcpu->vcpu_id,
> >>> ioapic->rtc_status.dest_map);
> >>>>>>>>> You should cleat dest_map at the beginning to get rid of stale bits.
> >>>>>>>> I thought kvm_set_ioapic is called only after save/restore or migration.
> >>> And
> >>>>> the
> >>>>>>> ioapic should be reset successfully before call it. So the
> >>>>>>> dest_map is empty before call rtc_irq_restore().
> >>>>>>>> But it is possible kvm_set_ioapic is called beside save/restore or
> >>>>>>>> migration. Right?
> >>>>>>>> 
> >>>>>>> First of all userspace should not care when it calls kvm_set_ioapic()
> >>>>>>> the kernel need to do the right thing. Second, believe it or not,
> >>>>>>> kvm_ioapic_reset() is not called during system reset. Instead userspace
> >>>>>>> reset it by calling kvm_set_ioapic() with ioapic state after reset.
> >>>>>> Ok. I see. As the logic you suggested, it will clear dest_map if no
> >>>>>> pending eoi in vcpu, so we don't need to do it again.
> >>>>>> 
> >>>>> You again rely on userspace doing thing in certain manner. What is
> >>>>> set_lapic() is never called? Kernel internal state have to be correct
> >>>>> after each ioctl call.
> >>>> Sorry. I cannot figure out what's the problem if don't clear dest_map?
> >>>> Can you elaborate it?
> >>>> 
> >>> What is not obvious about it? If there is a bit in dest_map that should
> >>> be cleared after rtc_irq_restore() it will not.
> >> Why it will not? If new_val is false, and the old_val is true.
> >> __clear_bit() will clear the dest_map. Am I wrong?
> >> 
> > Ah, yes with new logic since we go over all vcpus and calculate new
> > value for each one in theory it should be fine, but if we add cpu
> > destruction this will be no longer true.
> > 
> >> new_val = kvm_apic_pending_eoi(vcpu, vector);
> > Which reminds me there are more bugs in the current code. It is not
> > enough to call kvm_apic_pending_eoi() to check the new value. You need to
> > see if the entry is masked and vcpu is the destination of the interrupt too.
> No. kvm_apic_pending_eoi() is the right way. IOAPIC entry may change after vcpu received it before issue EOI, and we should not rely on the entry.
> For example:
> vcpu A received the interrupt, pending in IRR
> mask entry
> migration happened
> 
> The only problem is we may account the interrupt from non-IOAPIC(from IPI) into RTC interrupt. But it's ok, we will clear pending_eoi in EOI regardless of source.
> 
With apicv this EOI may never come. We have to at least check that the vcpu
is a destination for the interrupt.

--
			Gleb.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Zhang, Yang Z April 8, 2013, 1:12 p.m. UTC | #14
Gleb Natapov wrote on 2013-04-08:
> On Mon, Apr 08, 2013 at 11:21:34AM +0000, Zhang, Yang Z wrote:
>> Gleb Natapov wrote on 2013-04-07:
>>> On Sun, Apr 07, 2013 at 01:16:51PM +0000, Zhang, Yang Z wrote:
>>>> Gleb Natapov wrote on 2013-04-07:
>>>>> On Sun, Apr 07, 2013 at 01:05:02PM +0000, Zhang, Yang Z wrote:
>>>>>> Gleb Natapov wrote on 2013-04-07:
>>>>>>> On Sun, Apr 07, 2013 at 12:39:32PM +0000, Zhang, Yang Z wrote:
>>>>>>>> Gleb Natapov wrote on 2013-04-07:
>>>>>>>>> On Sun, Apr 07, 2013 at 02:30:15AM +0000, Zhang, Yang Z wrote:
>>>>>>>>>> Gleb Natapov wrote on 2013-04-04:
>>>>>>>>>>> On Mon, Apr 01, 2013 at 08:40:13AM +0800, Yang Zhang wrote:
>>>>>>>>>>>> From: Yang Zhang <yang.z.zhang@Intel.com>
>>>>>>>>>>>> 
>>>>>>>>>>>> Signed-off-by: Yang Zhang <yang.z.zhang@Intel.com>
>>>>>>>>>>>> ---
>>>>>>>>>>>>  arch/x86/kvm/lapic.c |    9 +++++++++ arch/x86/kvm/lapic.h |
>>>>>>>>>>>>  2 ++ virt/kvm/ioapic.c    |   43
>>>>>>>>>>>>  +++++++++++++++++++++++++++++++++++++++++++
>>>>>>>>>>>>  virt/kvm/ioapic.h | 1 + 4 files changed, 55 insertions(+), 0
>>>>>>>>>>>>  deletions(-)
>>>>>>>>>>>> diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
>>>>>>>>>>>> index 96ab160..9c041fa 100644
>>>>>>>>>>>> --- a/arch/x86/kvm/lapic.c
>>>>>>>>>>>> +++ b/arch/x86/kvm/lapic.c
>>>>>>>>>>>> @@ -94,6 +94,14 @@ static inline int apic_test_vector(int vec,
> void
>>>>>>> *bitmap)
>>>>>>>>>>>>  	return test_bit(VEC_POS(vec), (bitmap) + REG_POS(vec));
>>>>>>>>>>>>  }
>>>>>>>>>>>> +bool kvm_apic_pending_eoi(struct kvm_vcpu *vcpu, int vector)
>>>>>>>>>>>> +{
>>>>>>>>>>>> +	struct kvm_lapic *apic = vcpu->arch.apic;
>>>>>>>>>>>> +
>>>>>>>>>>>> +	return apic_test_vector(vector, apic->regs + APIC_ISR) ||
>>>>>>>>>>>> +		apic_test_vector(vector, apic->regs + APIC_IRR);
>>>>>>>>>>>> +}
>>>>>>>>>>>> +
>>>>>>>>>>>>  static inline void apic_set_vector(int vec, void *bitmap)
>>>>>>>>>>>>  {
>>>>>>>>>>>>  	set_bit(VEC_POS(vec), (bitmap) + REG_POS(vec));
>>>>>>>>>>>> @@ -1665,6 +1673,7 @@ void
> kvm_apic_post_state_restore(struct
>>>>>>>>> kvm_vcpu
>>>>>>>>>>> *vcpu,
>>>>>>>>>>>>  	apic->highest_isr_cache = -1;
>>>>>>>>>>>>  	kvm_x86_ops->hwapic_isr_update(vcpu->kvm,
>>>>>>>>>>>>  apic_find_highest_isr(apic));
>>>>>>>>>>>>  	kvm_make_request(KVM_REQ_EVENT, vcpu);
>>>>>>>>>>>>  +	kvm_rtc_irq_restore(vcpu); }
>>>>>>>>>>>>  
>>>>>>>>>>>>  void __kvm_migrate_apic_timer(struct kvm_vcpu *vcpu)
>>>>>>>>>>>> diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h
>>>>>>>>>>>> index 967519c..004d2ad 100644
>>>>>>>>>>>> --- a/arch/x86/kvm/lapic.h
>>>>>>>>>>>> +++ b/arch/x86/kvm/lapic.h
>>>>>>>>>>>> @@ -170,4 +170,6 @@ static inline bool
>>> kvm_apic_has_events(struct
>>>>>>>>>>> kvm_vcpu *vcpu)
>>>>>>>>>>>>  	return vcpu->arch.apic->pending_events;
>>>>>>>>>>>>  }
>>>>>>>>>>>> +bool kvm_apic_pending_eoi(struct kvm_vcpu *vcpu, int vector);
>>>>>>>>>>>> +
>>>>>>>>>>>>  #endif
>>>>>>>>>>>> diff --git a/virt/kvm/ioapic.c b/virt/kvm/ioapic.c
>>>>>>>>>>>> index 8664812..0b12b17 100644
>>>>>>>>>>>> --- a/virt/kvm/ioapic.c
>>>>>>>>>>>> +++ b/virt/kvm/ioapic.c
>>>>>>>>>>>> @@ -90,6 +90,47 @@ static unsigned long
>>> ioapic_read_indirect(struct
>>>>>>>>>>> kvm_ioapic *ioapic,
>>>>>>>>>>>>  	return result;
>>>>>>>>>>>>  }
>>>>>>>>>>>> +static void rtc_irq_reset(struct kvm_ioapic *ioapic) +{
>>>>>>>>>>>> +	ioapic->rtc_status.pending_eoi = 0;
>>>>>>>>>>>> +	bitmap_zero(ioapic->rtc_status.dest_map, KVM_MAX_VCPUS); +}
>>>>>>>>>>>> + +static void rtc_irq_restore(struct kvm_ioapic *ioapic) +{
>>>>>>>>>>>> +	struct kvm_vcpu *vcpu; +	int vector, i, pending_eoi = 0; +
>>>>>>>>>>>> +	if (RTC_GSI >= IOAPIC_NUM_PINS) +		return; + +	vector =
>>>>>>>>>>>> ioapic->redirtbl[RTC_GSI].fields.vector;
>>>>>>>>>>>> +	kvm_for_each_vcpu(i, vcpu, ioapic->kvm) { +		if
>>>>>>>>>>>> (kvm_apic_pending_eoi(vcpu, vector)) { +			pending_eoi++;
>>>>>>>>>>>> +			__set_bit(vcpu->vcpu_id,
>>>>> ioapic->rtc_status.dest_map);
>>>>>>>>>>> You should cleat dest_map at the beginning to get rid of stale bits.
>>>>>>>>>> I thought kvm_set_ioapic is called only after save/restore or
> migration.
>>>>> And
>>>>>>> the
>>>>>>>>> ioapic should be reset successfully before call it. So the
>>>>>>>>> dest_map is empty before call rtc_irq_restore().
>>>>>>>>>> But it is possible kvm_set_ioapic is called beside save/restore or
>>>>>>>>>> migration. Right?
>>>>>>>>>> 
>>>>>>>>> First of all userspace should not care when it calls
>>>>>>>>> kvm_set_ioapic() the kernel need to do the right thing. Second,
>>>>>>>>> believe it or not, kvm_ioapic_reset() is not called during
>>>>>>>>> system reset. Instead userspace reset it by calling
>>>>>>>>> kvm_set_ioapic() with ioapic state after reset.
>>>>>>>> Ok. I see. As the logic you suggested, it will clear dest_map if no
>>>>>>>> pending eoi in vcpu, so we don't need to do it again.
>>>>>>>> 
>>>>>>> You again rely on userspace doing thing in certain manner. What is
>>>>>>> set_lapic() is never called? Kernel internal state have to be correct
>>>>>>> after each ioctl call.
>>>>>> Sorry. I cannot figure out what's the problem if don't clear dest_map?
>>>>>> Can you elaborate it?
>>>>>> 
>>>>> What is not obvious about it? If there is a bit in dest_map that should
>>>>> be cleared after rtc_irq_restore() it will not.
>>>> Why it will not? If new_val is false, and the old_val is true.
>>>> __clear_bit() will clear the dest_map. Am I wrong?
>>>> 
>>> Ah, yes with new logic since we go over all vcpus and calculate new
>>> value for each one in theory it should be fine, but if we add cpu
>>> destruction this will be no longer true.
>>> 
>>>> new_val = kvm_apic_pending_eoi(vcpu, vector);
>>> Which reminds me there are more bugs in the current code. It is not
>>> enough to call kvm_apic_pending_eoi() to check the new value. You need to
>>> see if the entry is masked and vcpu is the destination of the interrupt too.
>> No. kvm_apic_pending_eoi() is the right way. IOAPIC entry may change
>> after vcpu received it before issue EOI, and we should not rely on the
>> entry. For example: vcpu A received the interrupt, pending in IRR mask
>> entry migration happened
>> 
>> The only problem is we may account the interrupt from non-IOAPIC(from IPI)
> into RTC interrupt. But it's ok, we will clear pending_eoi in EOI regardless of
> source.
>> 
> With apicv this EOI may never come. We have to at least check that the vcpu
> is a destination for the interrupt.
You are right. This is necessary/

Best regards,
Yang


--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 96ab160..9c041fa 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -94,6 +94,14 @@  static inline int apic_test_vector(int vec, void *bitmap)
 	return test_bit(VEC_POS(vec), (bitmap) + REG_POS(vec));
 }
 
+bool kvm_apic_pending_eoi(struct kvm_vcpu *vcpu, int vector)
+{
+	struct kvm_lapic *apic = vcpu->arch.apic;
+
+	return apic_test_vector(vector, apic->regs + APIC_ISR) ||
+		apic_test_vector(vector, apic->regs + APIC_IRR);
+}
+
 static inline void apic_set_vector(int vec, void *bitmap)
 {
 	set_bit(VEC_POS(vec), (bitmap) + REG_POS(vec));
@@ -1665,6 +1673,7 @@  void kvm_apic_post_state_restore(struct kvm_vcpu *vcpu,
 	apic->highest_isr_cache = -1;
 	kvm_x86_ops->hwapic_isr_update(vcpu->kvm, apic_find_highest_isr(apic));
 	kvm_make_request(KVM_REQ_EVENT, vcpu);
+	kvm_rtc_irq_restore(vcpu);
 }
 
 void __kvm_migrate_apic_timer(struct kvm_vcpu *vcpu)
diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h
index 967519c..004d2ad 100644
--- a/arch/x86/kvm/lapic.h
+++ b/arch/x86/kvm/lapic.h
@@ -170,4 +170,6 @@  static inline bool kvm_apic_has_events(struct kvm_vcpu *vcpu)
 	return vcpu->arch.apic->pending_events;
 }
 
+bool kvm_apic_pending_eoi(struct kvm_vcpu *vcpu, int vector);
+
 #endif
diff --git a/virt/kvm/ioapic.c b/virt/kvm/ioapic.c
index 8664812..0b12b17 100644
--- a/virt/kvm/ioapic.c
+++ b/virt/kvm/ioapic.c
@@ -90,6 +90,47 @@  static unsigned long ioapic_read_indirect(struct kvm_ioapic *ioapic,
 	return result;
 }
 
+static void rtc_irq_reset(struct kvm_ioapic *ioapic)
+{
+	ioapic->rtc_status.pending_eoi = 0;
+	bitmap_zero(ioapic->rtc_status.dest_map, KVM_MAX_VCPUS);
+}
+
+static void rtc_irq_restore(struct kvm_ioapic *ioapic)
+{
+	struct kvm_vcpu *vcpu;
+	int vector, i, pending_eoi = 0;
+
+	if (RTC_GSI >= IOAPIC_NUM_PINS)
+		return;
+
+	vector = ioapic->redirtbl[RTC_GSI].fields.vector;
+	kvm_for_each_vcpu(i, vcpu, ioapic->kvm) {
+		if (kvm_apic_pending_eoi(vcpu, vector)) {
+			pending_eoi++;
+			__set_bit(vcpu->vcpu_id, ioapic->rtc_status.dest_map);
+		}
+	}
+	ioapic->rtc_status.pending_eoi = pending_eoi;
+}
+
+void kvm_rtc_irq_restore(struct kvm_vcpu *vcpu)
+{
+	struct kvm_ioapic *ioapic = vcpu->kvm->arch.vioapic;
+	int vector;
+
+	if (!ioapic)
+		return;
+
+	spin_lock(&ioapic->lock);
+	vector = ioapic->redirtbl[RTC_GSI].fields.vector;
+	if (kvm_apic_pending_eoi(vcpu, vector)) {
+		__set_bit(vcpu->vcpu_id, ioapic->rtc_status.dest_map);
+		ioapic->rtc_status.pending_eoi++;
+	}
+	spin_unlock(&ioapic->lock);
+}
+
 static int ioapic_service(struct kvm_ioapic *ioapic, unsigned int idx)
 {
 	union kvm_ioapic_redirect_entry *pent;
@@ -431,6 +472,7 @@  void kvm_ioapic_reset(struct kvm_ioapic *ioapic)
 	ioapic->ioregsel = 0;
 	ioapic->irr = 0;
 	ioapic->id = 0;
+	rtc_irq_reset(ioapic);
 	update_handled_vectors(ioapic);
 }
 
@@ -496,6 +538,7 @@  int kvm_set_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state)
 	spin_lock(&ioapic->lock);
 	memcpy(ioapic, state, sizeof(struct kvm_ioapic_state));
 	update_handled_vectors(ioapic);
+	rtc_irq_restore(ioapic);
 	kvm_ioapic_make_eoibitmap_request(kvm);
 	spin_unlock(&ioapic->lock);
 	return 0;
diff --git a/virt/kvm/ioapic.h b/virt/kvm/ioapic.h
index 761e5b5..2692873 100644
--- a/virt/kvm/ioapic.h
+++ b/virt/kvm/ioapic.h
@@ -79,6 +79,7 @@  static inline struct kvm_ioapic *ioapic_irqchip(struct kvm *kvm)
 	return kvm->arch.vioapic;
 }
 
+void kvm_rtc_irq_restore(struct kvm_vcpu *vcpu);
 int kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source,
 		int short_hand, int dest, int dest_mode);
 int kvm_apic_compare_prio(struct kvm_vcpu *vcpu1, struct kvm_vcpu *vcpu2);