diff mbox

[v6,6/6] KVM: Use eoi to track RTC interrupt delivery status

Message ID 1363929845-29005-7-git-send-email-yang.z.zhang@intel.com (mailing list archive)
State New, archived
Headers show

Commit Message

Zhang, Yang Z March 22, 2013, 5:24 a.m. UTC
From: Yang Zhang <yang.z.zhang@Intel.com>

Current interrupt coalescing logci which only used by RTC has conflict
with Posted Interrupt.
This patch introduces a new mechinism to use eoi to track interrupt:
When delivering an interrupt to vcpu, the pending_eoi set to number of
vcpu that received the interrupt. And decrease it when each vcpu writing
eoi. No subsequent RTC interrupt can deliver to vcpu until all vcpus
write eoi.

Signed-off-by: Yang Zhang <yang.z.zhang@Intel.com>
---
 virt/kvm/ioapic.c |   40 +++++++++++++++++++++++++++++++++++++++-
 1 files changed, 39 insertions(+), 1 deletions(-)

Comments

Gleb Natapov March 22, 2013, 7:50 a.m. UTC | #1
On Fri, Mar 22, 2013 at 01:24:05PM +0800, Yang Zhang wrote:
> From: Yang Zhang <yang.z.zhang@Intel.com>
> 
> Current interrupt coalescing logci which only used by RTC has conflict
> with Posted Interrupt.
> This patch introduces a new mechinism to use eoi to track interrupt:
> When delivering an interrupt to vcpu, the pending_eoi set to number of
> vcpu that received the interrupt. And decrease it when each vcpu writing
> eoi. No subsequent RTC interrupt can deliver to vcpu until all vcpus
> write eoi.
> 
> Signed-off-by: Yang Zhang <yang.z.zhang@Intel.com>
> ---
>  virt/kvm/ioapic.c |   40 +++++++++++++++++++++++++++++++++++++++-
>  1 files changed, 39 insertions(+), 1 deletions(-)
> 
> diff --git a/virt/kvm/ioapic.c b/virt/kvm/ioapic.c
> index c991e58..df16daf 100644
> --- a/virt/kvm/ioapic.c
> +++ b/virt/kvm/ioapic.c
> @@ -114,6 +114,29 @@ static void rtc_irq_restore(struct kvm_ioapic *ioapic)
>  	ioapic->rtc_status.pending_eoi = pending_eoi;
>  }
>  
> +static void rtc_irq_ack_eoi(struct kvm_vcpu *vcpu,
> +			struct rtc_status *rtc_status, int irq)
> +{
> +	if (irq != RTC_GSI)
> +		return;
> +
> +	if (test_and_clear_bit(vcpu->vcpu_id, rtc_status->dest_map))
> +		--rtc_status->pending_eoi;
> +
> +	WARN_ON(rtc_status->pending_eoi < 0);
> +}
> +
> +static bool rtc_irq_check(struct kvm_ioapic *ioapic, int irq)
> +{
> +	if (irq != RTC_GSI)
> +		return false;
> +
> +	if (ioapic->rtc_status.pending_eoi > 0)
> +		return true; /* coalesced */
> +
> +	return false;
> +}
> +
>  static int ioapic_service(struct kvm_ioapic *ioapic, unsigned int idx)
>  {
>  	union kvm_ioapic_redirect_entry *pent;
> @@ -229,6 +252,7 @@ static int ioapic_deliver(struct kvm_ioapic *ioapic, int irq)
>  {
>  	union kvm_ioapic_redirect_entry *entry = &ioapic->redirtbl[irq];
>  	struct kvm_lapic_irq irqe;
> +	int ret;
>  
>  	ioapic_debug("dest=%x dest_mode=%x delivery_mode=%x "
>  		     "vector=%x trig_mode=%x\n",
> @@ -244,7 +268,14 @@ static int ioapic_deliver(struct kvm_ioapic *ioapic, int irq)
>  	irqe.level = 1;
>  	irqe.shorthand = 0;
>  
> -	return kvm_irq_delivery_to_apic(ioapic->kvm, NULL, &irqe, NULL);
> +	if (irq == RTC_GSI) {
> +		ret = kvm_irq_delivery_to_apic(ioapic->kvm, NULL, &irqe,
> +				ioapic->rtc_status.dest_map);
> +		ioapic->rtc_status.pending_eoi = ret;
We should track status only if IRQ_STATUS ioctl was used to inject an
interrupt.

> +	} else
> +		ret = kvm_irq_delivery_to_apic(ioapic->kvm, NULL, &irqe, NULL);
> +
> +	return ret;
>  }
>  
>  int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int irq_source_id,
> @@ -268,6 +299,11 @@ int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int irq_source_id,
>  		ret = 1;
>  	} else {
>  		int edge = (entry.fields.trig_mode == IOAPIC_EDGE_TRIG);
> +
> +		if (rtc_irq_check(ioapic, irq)) {
> +			ret = 0; /* coalesced */
> +			goto out;
> +		}
>  		ioapic->irr |= mask;
>  		if ((edge && old_irr != ioapic->irr) ||
>  		    (!edge && !entry.fields.remote_irr))
> @@ -275,6 +311,7 @@ int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int irq_source_id,
>  		else
>  			ret = 0; /* report coalesced interrupt */
>  	}
> +out:
>  	trace_kvm_ioapic_set_irq(entry.bits, irq, ret == 0);
>  	spin_unlock(&ioapic->lock);
>  
> @@ -302,6 +339,7 @@ static void __kvm_ioapic_update_eoi(struct kvm_vcpu *vcpu,
>  		if (ent->fields.vector != vector)
>  			continue;
>  
> +		rtc_irq_ack_eoi(vcpu, &ioapic->rtc_status, i);
>  		/*
>  		 * We are dropping lock while calling ack notifiers because ack
>  		 * notifier callbacks for assigned devices call into IOAPIC
> -- 
> 1.7.1

--
			Gleb.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Zhang, Yang Z March 22, 2013, 8:05 a.m. UTC | #2
Gleb Natapov wrote on 2013-03-22:
> On Fri, Mar 22, 2013 at 01:24:05PM +0800, Yang Zhang wrote:
>> From: Yang Zhang <yang.z.zhang@Intel.com>
>> 
>> Current interrupt coalescing logci which only used by RTC has conflict
>> with Posted Interrupt.
>> This patch introduces a new mechinism to use eoi to track interrupt:
>> When delivering an interrupt to vcpu, the pending_eoi set to number of
>> vcpu that received the interrupt. And decrease it when each vcpu writing
>> eoi. No subsequent RTC interrupt can deliver to vcpu until all vcpus
>> write eoi.
>> 
>> Signed-off-by: Yang Zhang <yang.z.zhang@Intel.com>
>> ---
>>  virt/kvm/ioapic.c |   40 +++++++++++++++++++++++++++++++++++++++-
>>  1 files changed, 39 insertions(+), 1 deletions(-)
>> diff --git a/virt/kvm/ioapic.c b/virt/kvm/ioapic.c
>> index c991e58..df16daf 100644
>> --- a/virt/kvm/ioapic.c
>> +++ b/virt/kvm/ioapic.c
>> @@ -114,6 +114,29 @@ static void rtc_irq_restore(struct kvm_ioapic *ioapic)
>>  	ioapic->rtc_status.pending_eoi = pending_eoi;
>>  }
>> +static void rtc_irq_ack_eoi(struct kvm_vcpu *vcpu,
>> +			struct rtc_status *rtc_status, int irq)
>> +{
>> +	if (irq != RTC_GSI)
>> +		return;
>> +
>> +	if (test_and_clear_bit(vcpu->vcpu_id, rtc_status->dest_map))
>> +		--rtc_status->pending_eoi;
>> +
>> +	WARN_ON(rtc_status->pending_eoi < 0);
>> +}
>> +
>> +static bool rtc_irq_check(struct kvm_ioapic *ioapic, int irq)
>> +{
>> +	if (irq != RTC_GSI)
>> +		return false;
>> +
>> +	if (ioapic->rtc_status.pending_eoi > 0)
>> +		return true; /* coalesced */
>> +
>> +	return false;
>> +}
>> +
>>  static int ioapic_service(struct kvm_ioapic *ioapic, unsigned int idx)
>>  {
>>  	union kvm_ioapic_redirect_entry *pent;
>> @@ -229,6 +252,7 @@ static int ioapic_deliver(struct kvm_ioapic *ioapic, int
> irq)
>>  {
>>  	union kvm_ioapic_redirect_entry *entry = &ioapic->redirtbl[irq];
>>  	struct kvm_lapic_irq irqe;
>> +	int ret;
>> 
>>  	ioapic_debug("dest=%x dest_mode=%x delivery_mode=%x "
>>  		     "vector=%x trig_mode=%x\n",
>> @@ -244,7 +268,14 @@ static int ioapic_deliver(struct kvm_ioapic *ioapic, int
> irq)
>>  	irqe.level = 1;
>>  	irqe.shorthand = 0;
>> -	return kvm_irq_delivery_to_apic(ioapic->kvm, NULL, &irqe, NULL);
>> +	if (irq == RTC_GSI) {
>> +		ret = kvm_irq_delivery_to_apic(ioapic->kvm, NULL, &irqe,
>> +				ioapic->rtc_status.dest_map);
>> +		ioapic->rtc_status.pending_eoi = ret;
> We should track status only if IRQ_STATUS ioctl was used to inject an
> interrupt.
We already know RTC will use IRQ_STATUS ioctl. Why check it again?

>> +	} else
>> +		ret = kvm_irq_delivery_to_apic(ioapic->kvm, NULL, &irqe, NULL);
>> +
>> +	return ret;
>>  }
>>  
>>  int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int irq_source_id,
>> @@ -268,6 +299,11 @@ int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int
> irq, int irq_source_id,
>>  		ret = 1;
>>  	} else {
>>  		int edge = (entry.fields.trig_mode == IOAPIC_EDGE_TRIG);
>> +
>> +		if (rtc_irq_check(ioapic, irq)) {
>> +			ret = 0; /* coalesced */
>> +			goto out;
>> +		}
>>  		ioapic->irr |= mask;
>>  		if ((edge && old_irr != ioapic->irr) ||
>>  		    (!edge && !entry.fields.remote_irr))
>> @@ -275,6 +311,7 @@ int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq,
> int irq_source_id,
>>  		else 			ret = 0; /* report coalesced interrupt */ 	} +out:
>>  	trace_kvm_ioapic_set_irq(entry.bits, irq, ret == 0);
>>  	spin_unlock(&ioapic->lock);
>> @@ -302,6 +339,7 @@ static void __kvm_ioapic_update_eoi(struct kvm_vcpu
> *vcpu,
>>  		if (ent->fields.vector != vector)
>>  			continue;
>> +		rtc_irq_ack_eoi(vcpu, &ioapic->rtc_status, i);
>>  		/*
>>  		 * We are dropping lock while calling ack notifiers because ack
>>  		 * notifier callbacks for assigned devices call into IOAPIC
>> --
>> 1.7.1
> 
> --
> 			Gleb.
> --
> To unsubscribe from this list: send the line "unsubscribe kvm" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html


Best regards,
Yang

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Gleb Natapov March 22, 2013, 8:14 a.m. UTC | #3
On Fri, Mar 22, 2013 at 08:05:27AM +0000, Zhang, Yang Z wrote:
> Gleb Natapov wrote on 2013-03-22:
> > On Fri, Mar 22, 2013 at 01:24:05PM +0800, Yang Zhang wrote:
> >> From: Yang Zhang <yang.z.zhang@Intel.com>
> >> 
> >> Current interrupt coalescing logci which only used by RTC has conflict
> >> with Posted Interrupt.
> >> This patch introduces a new mechinism to use eoi to track interrupt:
> >> When delivering an interrupt to vcpu, the pending_eoi set to number of
> >> vcpu that received the interrupt. And decrease it when each vcpu writing
> >> eoi. No subsequent RTC interrupt can deliver to vcpu until all vcpus
> >> write eoi.
> >> 
> >> Signed-off-by: Yang Zhang <yang.z.zhang@Intel.com>
> >> ---
> >>  virt/kvm/ioapic.c |   40 +++++++++++++++++++++++++++++++++++++++-
> >>  1 files changed, 39 insertions(+), 1 deletions(-)
> >> diff --git a/virt/kvm/ioapic.c b/virt/kvm/ioapic.c
> >> index c991e58..df16daf 100644
> >> --- a/virt/kvm/ioapic.c
> >> +++ b/virt/kvm/ioapic.c
> >> @@ -114,6 +114,29 @@ static void rtc_irq_restore(struct kvm_ioapic *ioapic)
> >>  	ioapic->rtc_status.pending_eoi = pending_eoi;
> >>  }
> >> +static void rtc_irq_ack_eoi(struct kvm_vcpu *vcpu,
> >> +			struct rtc_status *rtc_status, int irq)
> >> +{
> >> +	if (irq != RTC_GSI)
> >> +		return;
> >> +
> >> +	if (test_and_clear_bit(vcpu->vcpu_id, rtc_status->dest_map))
> >> +		--rtc_status->pending_eoi;
> >> +
> >> +	WARN_ON(rtc_status->pending_eoi < 0);
> >> +}
> >> +
> >> +static bool rtc_irq_check(struct kvm_ioapic *ioapic, int irq)
> >> +{
> >> +	if (irq != RTC_GSI)
> >> +		return false;
> >> +
> >> +	if (ioapic->rtc_status.pending_eoi > 0)
> >> +		return true; /* coalesced */
> >> +
> >> +	return false;
> >> +}
> >> +
> >>  static int ioapic_service(struct kvm_ioapic *ioapic, unsigned int idx)
> >>  {
> >>  	union kvm_ioapic_redirect_entry *pent;
> >> @@ -229,6 +252,7 @@ static int ioapic_deliver(struct kvm_ioapic *ioapic, int
> > irq)
> >>  {
> >>  	union kvm_ioapic_redirect_entry *entry = &ioapic->redirtbl[irq];
> >>  	struct kvm_lapic_irq irqe;
> >> +	int ret;
> >> 
> >>  	ioapic_debug("dest=%x dest_mode=%x delivery_mode=%x "
> >>  		     "vector=%x trig_mode=%x\n",
> >> @@ -244,7 +268,14 @@ static int ioapic_deliver(struct kvm_ioapic *ioapic, int
> > irq)
> >>  	irqe.level = 1;
> >>  	irqe.shorthand = 0;
> >> -	return kvm_irq_delivery_to_apic(ioapic->kvm, NULL, &irqe, NULL);
> >> +	if (irq == RTC_GSI) {
> >> +		ret = kvm_irq_delivery_to_apic(ioapic->kvm, NULL, &irqe,
> >> +				ioapic->rtc_status.dest_map);
> >> +		ioapic->rtc_status.pending_eoi = ret;
> > We should track status only if IRQ_STATUS ioctl was used to inject an
> > interrupt.
> We already know RTC will use IRQ_STATUS ioctl. Why check it again?
> 
QEMU does. QEMU is not the only userspace.

--
			Gleb.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Zhang, Yang Z March 22, 2013, 8:25 a.m. UTC | #4
Gleb Natapov wrote on 2013-03-22:
> On Fri, Mar 22, 2013 at 08:05:27AM +0000, Zhang, Yang Z wrote:
>> Gleb Natapov wrote on 2013-03-22:
>>> On Fri, Mar 22, 2013 at 01:24:05PM +0800, Yang Zhang wrote:
>>>> From: Yang Zhang <yang.z.zhang@Intel.com>
>>>> 
>>>> Current interrupt coalescing logci which only used by RTC has conflict
>>>> with Posted Interrupt.
>>>> This patch introduces a new mechinism to use eoi to track interrupt:
>>>> When delivering an interrupt to vcpu, the pending_eoi set to number of
>>>> vcpu that received the interrupt. And decrease it when each vcpu writing
>>>> eoi. No subsequent RTC interrupt can deliver to vcpu until all vcpus
>>>> write eoi.
>>>> 
>>>> Signed-off-by: Yang Zhang <yang.z.zhang@Intel.com>
>>>> ---
>>>>  virt/kvm/ioapic.c |   40 +++++++++++++++++++++++++++++++++++++++- 1
>>>>  files changed, 39 insertions(+), 1 deletions(-)
>>>> diff --git a/virt/kvm/ioapic.c b/virt/kvm/ioapic.c
>>>> index c991e58..df16daf 100644
>>>> --- a/virt/kvm/ioapic.c
>>>> +++ b/virt/kvm/ioapic.c
>>>> @@ -114,6 +114,29 @@ static void rtc_irq_restore(struct kvm_ioapic
> *ioapic)
>>>>  	ioapic->rtc_status.pending_eoi = pending_eoi;
>>>>  }
>>>> +static void rtc_irq_ack_eoi(struct kvm_vcpu *vcpu,
>>>> +			struct rtc_status *rtc_status, int irq)
>>>> +{
>>>> +	if (irq != RTC_GSI)
>>>> +		return;
>>>> +
>>>> +	if (test_and_clear_bit(vcpu->vcpu_id, rtc_status->dest_map))
>>>> +		--rtc_status->pending_eoi;
>>>> +
>>>> +	WARN_ON(rtc_status->pending_eoi < 0);
>>>> +}
>>>> +
>>>> +static bool rtc_irq_check(struct kvm_ioapic *ioapic, int irq)
>>>> +{
>>>> +	if (irq != RTC_GSI)
>>>> +		return false;
>>>> +
>>>> +	if (ioapic->rtc_status.pending_eoi > 0)
>>>> +		return true; /* coalesced */
>>>> +
>>>> +	return false;
>>>> +}
>>>> +
>>>>  static int ioapic_service(struct kvm_ioapic *ioapic, unsigned int idx)
>>>>  {
>>>>  	union kvm_ioapic_redirect_entry *pent;
>>>> @@ -229,6 +252,7 @@ static int ioapic_deliver(struct kvm_ioapic *ioapic, int
>>> irq)
>>>>  {
>>>>  	union kvm_ioapic_redirect_entry *entry = &ioapic->redirtbl[irq];
>>>>  	struct kvm_lapic_irq irqe;
>>>> +	int ret;
>>>> 
>>>>  	ioapic_debug("dest=%x dest_mode=%x delivery_mode=%x "
>>>>  		     "vector=%x trig_mode=%x\n",
>>>> @@ -244,7 +268,14 @@ static int ioapic_deliver(struct kvm_ioapic *ioapic,
> int
>>> irq)
>>>>  	irqe.level = 1;
>>>>  	irqe.shorthand = 0;
>>>> -	return kvm_irq_delivery_to_apic(ioapic->kvm, NULL, &irqe, NULL);
>>>> +	if (irq == RTC_GSI) {
>>>> +		ret = kvm_irq_delivery_to_apic(ioapic->kvm, NULL, &irqe,
>>>> +				ioapic->rtc_status.dest_map);
>>>> +		ioapic->rtc_status.pending_eoi = ret;
>>> We should track status only if IRQ_STATUS ioctl was used to inject an
>>> interrupt.
>> We already know RTC will use IRQ_STATUS ioctl. Why check it again?
>> 
> QEMU does. QEMU is not the only userspace.
And this will break other userspace.

Best regards,
Yang


--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Gleb Natapov March 22, 2013, 8:30 a.m. UTC | #5
On Fri, Mar 22, 2013 at 08:25:21AM +0000, Zhang, Yang Z wrote:
> Gleb Natapov wrote on 2013-03-22:
> > On Fri, Mar 22, 2013 at 08:05:27AM +0000, Zhang, Yang Z wrote:
> >> Gleb Natapov wrote on 2013-03-22:
> >>> On Fri, Mar 22, 2013 at 01:24:05PM +0800, Yang Zhang wrote:
> >>>> From: Yang Zhang <yang.z.zhang@Intel.com>
> >>>> 
> >>>> Current interrupt coalescing logci which only used by RTC has conflict
> >>>> with Posted Interrupt.
> >>>> This patch introduces a new mechinism to use eoi to track interrupt:
> >>>> When delivering an interrupt to vcpu, the pending_eoi set to number of
> >>>> vcpu that received the interrupt. And decrease it when each vcpu writing
> >>>> eoi. No subsequent RTC interrupt can deliver to vcpu until all vcpus
> >>>> write eoi.
> >>>> 
> >>>> Signed-off-by: Yang Zhang <yang.z.zhang@Intel.com>
> >>>> ---
> >>>>  virt/kvm/ioapic.c |   40 +++++++++++++++++++++++++++++++++++++++- 1
> >>>>  files changed, 39 insertions(+), 1 deletions(-)
> >>>> diff --git a/virt/kvm/ioapic.c b/virt/kvm/ioapic.c
> >>>> index c991e58..df16daf 100644
> >>>> --- a/virt/kvm/ioapic.c
> >>>> +++ b/virt/kvm/ioapic.c
> >>>> @@ -114,6 +114,29 @@ static void rtc_irq_restore(struct kvm_ioapic
> > *ioapic)
> >>>>  	ioapic->rtc_status.pending_eoi = pending_eoi;
> >>>>  }
> >>>> +static void rtc_irq_ack_eoi(struct kvm_vcpu *vcpu,
> >>>> +			struct rtc_status *rtc_status, int irq)
> >>>> +{
> >>>> +	if (irq != RTC_GSI)
> >>>> +		return;
> >>>> +
> >>>> +	if (test_and_clear_bit(vcpu->vcpu_id, rtc_status->dest_map))
> >>>> +		--rtc_status->pending_eoi;
> >>>> +
> >>>> +	WARN_ON(rtc_status->pending_eoi < 0);
> >>>> +}
> >>>> +
> >>>> +static bool rtc_irq_check(struct kvm_ioapic *ioapic, int irq)
> >>>> +{
> >>>> +	if (irq != RTC_GSI)
> >>>> +		return false;
> >>>> +
> >>>> +	if (ioapic->rtc_status.pending_eoi > 0)
> >>>> +		return true; /* coalesced */
> >>>> +
> >>>> +	return false;
> >>>> +}
> >>>> +
> >>>>  static int ioapic_service(struct kvm_ioapic *ioapic, unsigned int idx)
> >>>>  {
> >>>>  	union kvm_ioapic_redirect_entry *pent;
> >>>> @@ -229,6 +252,7 @@ static int ioapic_deliver(struct kvm_ioapic *ioapic, int
> >>> irq)
> >>>>  {
> >>>>  	union kvm_ioapic_redirect_entry *entry = &ioapic->redirtbl[irq];
> >>>>  	struct kvm_lapic_irq irqe;
> >>>> +	int ret;
> >>>> 
> >>>>  	ioapic_debug("dest=%x dest_mode=%x delivery_mode=%x "
> >>>>  		     "vector=%x trig_mode=%x\n",
> >>>> @@ -244,7 +268,14 @@ static int ioapic_deliver(struct kvm_ioapic *ioapic,
> > int
> >>> irq)
> >>>>  	irqe.level = 1;
> >>>>  	irqe.shorthand = 0;
> >>>> -	return kvm_irq_delivery_to_apic(ioapic->kvm, NULL, &irqe, NULL);
> >>>> +	if (irq == RTC_GSI) {
> >>>> +		ret = kvm_irq_delivery_to_apic(ioapic->kvm, NULL, &irqe,
> >>>> +				ioapic->rtc_status.dest_map);
> >>>> +		ioapic->rtc_status.pending_eoi = ret;
> >>> We should track status only if IRQ_STATUS ioctl was used to inject an
> >>> interrupt.
> >> We already know RTC will use IRQ_STATUS ioctl. Why check it again?
> >> 
> > QEMU does. QEMU is not the only userspace.
> And this will break other userspace.
> 
How?

--
			Gleb.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Zhang, Yang Z March 22, 2013, 8:37 a.m. UTC | #6
Gleb Natapov wrote on 2013-03-22:
> On Fri, Mar 22, 2013 at 08:25:21AM +0000, Zhang, Yang Z wrote:
>> Gleb Natapov wrote on 2013-03-22:
>>> On Fri, Mar 22, 2013 at 08:05:27AM +0000, Zhang, Yang Z wrote:
>>>> Gleb Natapov wrote on 2013-03-22:
>>>>> On Fri, Mar 22, 2013 at 01:24:05PM +0800, Yang Zhang wrote:
>>>>>> From: Yang Zhang <yang.z.zhang@Intel.com>
>>>>>> 
>>>>>> Current interrupt coalescing logci which only used by RTC has conflict
>>>>>> with Posted Interrupt.
>>>>>> This patch introduces a new mechinism to use eoi to track interrupt:
>>>>>> When delivering an interrupt to vcpu, the pending_eoi set to number of
>>>>>> vcpu that received the interrupt. And decrease it when each vcpu writing
>>>>>> eoi. No subsequent RTC interrupt can deliver to vcpu until all vcpus
>>>>>> write eoi.
>>>>>> 
>>>>>> Signed-off-by: Yang Zhang <yang.z.zhang@Intel.com>
>>>>>> ---
>>>>>>  virt/kvm/ioapic.c |   40 +++++++++++++++++++++++++++++++++++++++-
>>>>>>  1 files changed, 39 insertions(+), 1 deletions(-)
>>>>>> diff --git a/virt/kvm/ioapic.c b/virt/kvm/ioapic.c
>>>>>> index c991e58..df16daf 100644
>>>>>> --- a/virt/kvm/ioapic.c
>>>>>> +++ b/virt/kvm/ioapic.c
>>>>>> @@ -114,6 +114,29 @@ static void rtc_irq_restore(struct kvm_ioapic
>>> *ioapic)
>>>>>>  	ioapic->rtc_status.pending_eoi = pending_eoi;
>>>>>>  }
>>>>>> +static void rtc_irq_ack_eoi(struct kvm_vcpu *vcpu,
>>>>>> +			struct rtc_status *rtc_status, int irq)
>>>>>> +{
>>>>>> +	if (irq != RTC_GSI)
>>>>>> +		return;
>>>>>> +
>>>>>> +	if (test_and_clear_bit(vcpu->vcpu_id, rtc_status->dest_map))
>>>>>> +		--rtc_status->pending_eoi;
>>>>>> +
>>>>>> +	WARN_ON(rtc_status->pending_eoi < 0);
>>>>>> +}
>>>>>> +
>>>>>> +static bool rtc_irq_check(struct kvm_ioapic *ioapic, int irq)
>>>>>> +{
>>>>>> +	if (irq != RTC_GSI)
>>>>>> +		return false;
>>>>>> +
>>>>>> +	if (ioapic->rtc_status.pending_eoi > 0)
>>>>>> +		return true; /* coalesced */
>>>>>> +
>>>>>> +	return false;
>>>>>> +}
>>>>>> +
>>>>>>  static int ioapic_service(struct kvm_ioapic *ioapic, unsigned int idx)
>>>>>>  {
>>>>>>  	union kvm_ioapic_redirect_entry *pent;
>>>>>> @@ -229,6 +252,7 @@ static int ioapic_deliver(struct kvm_ioapic *ioapic,
> int
>>>>> irq)
>>>>>>  {
>>>>>>  	union kvm_ioapic_redirect_entry *entry = &ioapic->redirtbl[irq];
>>>>>>  	struct kvm_lapic_irq irqe;
>>>>>> +	int ret;
>>>>>> 
>>>>>>  	ioapic_debug("dest=%x dest_mode=%x delivery_mode=%x "
>>>>>>  		     "vector=%x trig_mode=%x\n",
>>>>>> @@ -244,7 +268,14 @@ static int ioapic_deliver(struct kvm_ioapic *ioapic,
>>> int
>>>>> irq)
>>>>>>  	irqe.level = 1;
>>>>>>  	irqe.shorthand = 0;
>>>>>> -	return kvm_irq_delivery_to_apic(ioapic->kvm, NULL, &irqe, NULL);
>>>>>> +	if (irq == RTC_GSI) {
>>>>>> +		ret = kvm_irq_delivery_to_apic(ioapic->kvm, NULL, &irqe,
>>>>>> +				ioapic->rtc_status.dest_map);
>>>>>> +		ioapic->rtc_status.pending_eoi = ret;
>>>>> We should track status only if IRQ_STATUS ioctl was used to inject an
>>>>> interrupt.
>>>> We already know RTC will use IRQ_STATUS ioctl. Why check it again?
>>>> 
>>> QEMU does. QEMU is not the only userspace.
>> And this will break other userspace.
>> 
> How?
If other userspace has the reinjection logic for RTC, but it not uses IRQ_STATUS, then it cannot get the right coalescing info. If it also use IRQ_STATUS to get coalescing info, then we don't need the IRQ_STATUS check.

Best regards,
Yang


--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Gleb Natapov March 22, 2013, 8:44 a.m. UTC | #7
On Fri, Mar 22, 2013 at 08:37:22AM +0000, Zhang, Yang Z wrote:
> Gleb Natapov wrote on 2013-03-22:
> > On Fri, Mar 22, 2013 at 08:25:21AM +0000, Zhang, Yang Z wrote:
> >> Gleb Natapov wrote on 2013-03-22:
> >>> On Fri, Mar 22, 2013 at 08:05:27AM +0000, Zhang, Yang Z wrote:
> >>>> Gleb Natapov wrote on 2013-03-22:
> >>>>> On Fri, Mar 22, 2013 at 01:24:05PM +0800, Yang Zhang wrote:
> >>>>>> From: Yang Zhang <yang.z.zhang@Intel.com>
> >>>>>> 
> >>>>>> Current interrupt coalescing logci which only used by RTC has conflict
> >>>>>> with Posted Interrupt.
> >>>>>> This patch introduces a new mechinism to use eoi to track interrupt:
> >>>>>> When delivering an interrupt to vcpu, the pending_eoi set to number of
> >>>>>> vcpu that received the interrupt. And decrease it when each vcpu writing
> >>>>>> eoi. No subsequent RTC interrupt can deliver to vcpu until all vcpus
> >>>>>> write eoi.
> >>>>>> 
> >>>>>> Signed-off-by: Yang Zhang <yang.z.zhang@Intel.com>
> >>>>>> ---
> >>>>>>  virt/kvm/ioapic.c |   40 +++++++++++++++++++++++++++++++++++++++-
> >>>>>>  1 files changed, 39 insertions(+), 1 deletions(-)
> >>>>>> diff --git a/virt/kvm/ioapic.c b/virt/kvm/ioapic.c
> >>>>>> index c991e58..df16daf 100644
> >>>>>> --- a/virt/kvm/ioapic.c
> >>>>>> +++ b/virt/kvm/ioapic.c
> >>>>>> @@ -114,6 +114,29 @@ static void rtc_irq_restore(struct kvm_ioapic
> >>> *ioapic)
> >>>>>>  	ioapic->rtc_status.pending_eoi = pending_eoi;
> >>>>>>  }
> >>>>>> +static void rtc_irq_ack_eoi(struct kvm_vcpu *vcpu,
> >>>>>> +			struct rtc_status *rtc_status, int irq)
> >>>>>> +{
> >>>>>> +	if (irq != RTC_GSI)
> >>>>>> +		return;
> >>>>>> +
> >>>>>> +	if (test_and_clear_bit(vcpu->vcpu_id, rtc_status->dest_map))
> >>>>>> +		--rtc_status->pending_eoi;
> >>>>>> +
> >>>>>> +	WARN_ON(rtc_status->pending_eoi < 0);
> >>>>>> +}
> >>>>>> +
> >>>>>> +static bool rtc_irq_check(struct kvm_ioapic *ioapic, int irq)
> >>>>>> +{
> >>>>>> +	if (irq != RTC_GSI)
> >>>>>> +		return false;
> >>>>>> +
> >>>>>> +	if (ioapic->rtc_status.pending_eoi > 0)
> >>>>>> +		return true; /* coalesced */
> >>>>>> +
> >>>>>> +	return false;
> >>>>>> +}
> >>>>>> +
> >>>>>>  static int ioapic_service(struct kvm_ioapic *ioapic, unsigned int idx)
> >>>>>>  {
> >>>>>>  	union kvm_ioapic_redirect_entry *pent;
> >>>>>> @@ -229,6 +252,7 @@ static int ioapic_deliver(struct kvm_ioapic *ioapic,
> > int
> >>>>> irq)
> >>>>>>  {
> >>>>>>  	union kvm_ioapic_redirect_entry *entry = &ioapic->redirtbl[irq];
> >>>>>>  	struct kvm_lapic_irq irqe;
> >>>>>> +	int ret;
> >>>>>> 
> >>>>>>  	ioapic_debug("dest=%x dest_mode=%x delivery_mode=%x "
> >>>>>>  		     "vector=%x trig_mode=%x\n",
> >>>>>> @@ -244,7 +268,14 @@ static int ioapic_deliver(struct kvm_ioapic *ioapic,
> >>> int
> >>>>> irq)
> >>>>>>  	irqe.level = 1;
> >>>>>>  	irqe.shorthand = 0;
> >>>>>> -	return kvm_irq_delivery_to_apic(ioapic->kvm, NULL, &irqe, NULL);
> >>>>>> +	if (irq == RTC_GSI) {
> >>>>>> +		ret = kvm_irq_delivery_to_apic(ioapic->kvm, NULL, &irqe,
> >>>>>> +				ioapic->rtc_status.dest_map);
> >>>>>> +		ioapic->rtc_status.pending_eoi = ret;
> >>>>> We should track status only if IRQ_STATUS ioctl was used to inject an
> >>>>> interrupt.
> >>>> We already know RTC will use IRQ_STATUS ioctl. Why check it again?
> >>>> 
> >>> QEMU does. QEMU is not the only userspace.
> >> And this will break other userspace.
> >> 
> > How?
> If other userspace has the reinjection logic for RTC, but it not uses IRQ_STATUS, then it cannot get the right coalescing info. If it also use IRQ_STATUS to get coalescing info, then we don't need the IRQ_STATUS check.
> 
If userspace does not care about irq status it does not use IRQ_STATUS
ioctl and we should not go extra mile to provide one. Not everyone cares
about running Windows as a guest.

--
			Gleb.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Zhang, Yang Z March 22, 2013, 8:51 a.m. UTC | #8
Gleb Natapov wrote on 2013-03-22:
> On Fri, Mar 22, 2013 at 08:37:22AM +0000, Zhang, Yang Z wrote:
>> Gleb Natapov wrote on 2013-03-22:
>>> On Fri, Mar 22, 2013 at 08:25:21AM +0000, Zhang, Yang Z wrote:
>>>> Gleb Natapov wrote on 2013-03-22:
>>>>> On Fri, Mar 22, 2013 at 08:05:27AM +0000, Zhang, Yang Z wrote:
>>>>>> Gleb Natapov wrote on 2013-03-22:
>>>>>>> On Fri, Mar 22, 2013 at 01:24:05PM +0800, Yang Zhang wrote:
>>>>>>>> From: Yang Zhang <yang.z.zhang@Intel.com>
>>>>>>>> 
>>>>>>>> Current interrupt coalescing logci which only used by RTC has
>>>>>>>> conflict with Posted Interrupt. This patch introduces a new
>>>>>>>> mechinism to use eoi to track interrupt: When delivering an
>>>>>>>> interrupt to vcpu, the pending_eoi set to number of vcpu that
>>>>>>>> received the interrupt. And decrease it when each vcpu writing
>>>>>>>> eoi. No subsequent RTC interrupt can deliver to vcpu until all
>>>>>>>> vcpus write eoi.
>>>>>>>> 
>>>>>>>> Signed-off-by: Yang Zhang <yang.z.zhang@Intel.com>
>>>>>>>> ---
>>>>>>>>  virt/kvm/ioapic.c |   40
>>>>>>>>  +++++++++++++++++++++++++++++++++++++++- 1 files changed, 39
>>>>>>>>  insertions(+), 1 deletions(-)
>>>>>>>> diff --git a/virt/kvm/ioapic.c b/virt/kvm/ioapic.c
>>>>>>>> index c991e58..df16daf 100644
>>>>>>>> --- a/virt/kvm/ioapic.c
>>>>>>>> +++ b/virt/kvm/ioapic.c
>>>>>>>> @@ -114,6 +114,29 @@ static void rtc_irq_restore(struct kvm_ioapic
>>>>> *ioapic)
>>>>>>>>  	ioapic->rtc_status.pending_eoi = pending_eoi;
>>>>>>>>  }
>>>>>>>> +static void rtc_irq_ack_eoi(struct kvm_vcpu *vcpu,
>>>>>>>> +			struct rtc_status *rtc_status, int irq)
>>>>>>>> +{
>>>>>>>> +	if (irq != RTC_GSI)
>>>>>>>> +		return;
>>>>>>>> +
>>>>>>>> +	if (test_and_clear_bit(vcpu->vcpu_id, rtc_status->dest_map))
>>>>>>>> +		--rtc_status->pending_eoi;
>>>>>>>> +
>>>>>>>> +	WARN_ON(rtc_status->pending_eoi < 0);
>>>>>>>> +}
>>>>>>>> +
>>>>>>>> +static bool rtc_irq_check(struct kvm_ioapic *ioapic, int irq)
>>>>>>>> +{
>>>>>>>> +	if (irq != RTC_GSI)
>>>>>>>> +		return false;
>>>>>>>> +
>>>>>>>> +	if (ioapic->rtc_status.pending_eoi > 0)
>>>>>>>> +		return true; /* coalesced */
>>>>>>>> +
>>>>>>>> +	return false;
>>>>>>>> +}
>>>>>>>> +
>>>>>>>>  static int ioapic_service(struct kvm_ioapic *ioapic, unsigned int idx)
>>>>>>>>  {
>>>>>>>>  	union kvm_ioapic_redirect_entry *pent;
>>>>>>>> @@ -229,6 +252,7 @@ static int ioapic_deliver(struct kvm_ioapic
> *ioapic,
>>> int
>>>>>>> irq)
>>>>>>>>  {
>>>>>>>>  	union kvm_ioapic_redirect_entry *entry = &ioapic->redirtbl[irq];
>>>>>>>>  	struct kvm_lapic_irq irqe;
>>>>>>>> +	int ret;
>>>>>>>> 
>>>>>>>>  	ioapic_debug("dest=%x dest_mode=%x delivery_mode=%x "
>>>>>>>>  		     "vector=%x trig_mode=%x\n",
>>>>>>>> @@ -244,7 +268,14 @@ static int ioapic_deliver(struct kvm_ioapic
> *ioapic,
>>>>> int
>>>>>>> irq)
>>>>>>>>  	irqe.level = 1;
>>>>>>>>  	irqe.shorthand = 0;
>>>>>>>> -	return kvm_irq_delivery_to_apic(ioapic->kvm, NULL, &irqe,
>>>>>>>> NULL); +	if (irq == RTC_GSI) { +		ret =
>>>>>>>> kvm_irq_delivery_to_apic(ioapic->kvm, NULL, &irqe,
>>>>>>>> +				ioapic->rtc_status.dest_map);
>>>>>>>> +		ioapic->rtc_status.pending_eoi = ret;
>>>>>>> We should track status only if IRQ_STATUS ioctl was used to inject an
>>>>>>> interrupt.
>>>>>> We already know RTC will use IRQ_STATUS ioctl. Why check it again?
>>>>>> 
>>>>> QEMU does. QEMU is not the only userspace.
>>>> And this will break other userspace.
>>>> 
>>> How?
>> If other userspace has the reinjection logic for RTC, but it not uses IRQ_STATUS,
> then it cannot get the right coalescing info. If it also use IRQ_STATUS to get
> coalescing info, then we don't need the IRQ_STATUS check.
>> 
> If userspace does not care about irq status it does not use IRQ_STATUS
> ioctl and we should not go extra mile to provide one. Not everyone cares
> about running Windows as a guest.
I see your point. But if no windows guest running, RTC is hardly used by other guests and the overheard can be ignore.

Best regards,
Yang


--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Gleb Natapov March 22, 2013, 8:54 a.m. UTC | #9
On Fri, Mar 22, 2013 at 08:51:47AM +0000, Zhang, Yang Z wrote:
> Gleb Natapov wrote on 2013-03-22:
> > On Fri, Mar 22, 2013 at 08:37:22AM +0000, Zhang, Yang Z wrote:
> >> Gleb Natapov wrote on 2013-03-22:
> >>> On Fri, Mar 22, 2013 at 08:25:21AM +0000, Zhang, Yang Z wrote:
> >>>> Gleb Natapov wrote on 2013-03-22:
> >>>>> On Fri, Mar 22, 2013 at 08:05:27AM +0000, Zhang, Yang Z wrote:
> >>>>>> Gleb Natapov wrote on 2013-03-22:
> >>>>>>> On Fri, Mar 22, 2013 at 01:24:05PM +0800, Yang Zhang wrote:
> >>>>>>>> From: Yang Zhang <yang.z.zhang@Intel.com>
> >>>>>>>> 
> >>>>>>>> Current interrupt coalescing logci which only used by RTC has
> >>>>>>>> conflict with Posted Interrupt. This patch introduces a new
> >>>>>>>> mechinism to use eoi to track interrupt: When delivering an
> >>>>>>>> interrupt to vcpu, the pending_eoi set to number of vcpu that
> >>>>>>>> received the interrupt. And decrease it when each vcpu writing
> >>>>>>>> eoi. No subsequent RTC interrupt can deliver to vcpu until all
> >>>>>>>> vcpus write eoi.
> >>>>>>>> 
> >>>>>>>> Signed-off-by: Yang Zhang <yang.z.zhang@Intel.com>
> >>>>>>>> ---
> >>>>>>>>  virt/kvm/ioapic.c |   40
> >>>>>>>>  +++++++++++++++++++++++++++++++++++++++- 1 files changed, 39
> >>>>>>>>  insertions(+), 1 deletions(-)
> >>>>>>>> diff --git a/virt/kvm/ioapic.c b/virt/kvm/ioapic.c
> >>>>>>>> index c991e58..df16daf 100644
> >>>>>>>> --- a/virt/kvm/ioapic.c
> >>>>>>>> +++ b/virt/kvm/ioapic.c
> >>>>>>>> @@ -114,6 +114,29 @@ static void rtc_irq_restore(struct kvm_ioapic
> >>>>> *ioapic)
> >>>>>>>>  	ioapic->rtc_status.pending_eoi = pending_eoi;
> >>>>>>>>  }
> >>>>>>>> +static void rtc_irq_ack_eoi(struct kvm_vcpu *vcpu,
> >>>>>>>> +			struct rtc_status *rtc_status, int irq)
> >>>>>>>> +{
> >>>>>>>> +	if (irq != RTC_GSI)
> >>>>>>>> +		return;
> >>>>>>>> +
> >>>>>>>> +	if (test_and_clear_bit(vcpu->vcpu_id, rtc_status->dest_map))
> >>>>>>>> +		--rtc_status->pending_eoi;
> >>>>>>>> +
> >>>>>>>> +	WARN_ON(rtc_status->pending_eoi < 0);
> >>>>>>>> +}
> >>>>>>>> +
> >>>>>>>> +static bool rtc_irq_check(struct kvm_ioapic *ioapic, int irq)
> >>>>>>>> +{
> >>>>>>>> +	if (irq != RTC_GSI)
> >>>>>>>> +		return false;
> >>>>>>>> +
> >>>>>>>> +	if (ioapic->rtc_status.pending_eoi > 0)
> >>>>>>>> +		return true; /* coalesced */
> >>>>>>>> +
> >>>>>>>> +	return false;
> >>>>>>>> +}
> >>>>>>>> +
> >>>>>>>>  static int ioapic_service(struct kvm_ioapic *ioapic, unsigned int idx)
> >>>>>>>>  {
> >>>>>>>>  	union kvm_ioapic_redirect_entry *pent;
> >>>>>>>> @@ -229,6 +252,7 @@ static int ioapic_deliver(struct kvm_ioapic
> > *ioapic,
> >>> int
> >>>>>>> irq)
> >>>>>>>>  {
> >>>>>>>>  	union kvm_ioapic_redirect_entry *entry = &ioapic->redirtbl[irq];
> >>>>>>>>  	struct kvm_lapic_irq irqe;
> >>>>>>>> +	int ret;
> >>>>>>>> 
> >>>>>>>>  	ioapic_debug("dest=%x dest_mode=%x delivery_mode=%x "
> >>>>>>>>  		     "vector=%x trig_mode=%x\n",
> >>>>>>>> @@ -244,7 +268,14 @@ static int ioapic_deliver(struct kvm_ioapic
> > *ioapic,
> >>>>> int
> >>>>>>> irq)
> >>>>>>>>  	irqe.level = 1;
> >>>>>>>>  	irqe.shorthand = 0;
> >>>>>>>> -	return kvm_irq_delivery_to_apic(ioapic->kvm, NULL, &irqe,
> >>>>>>>> NULL); +	if (irq == RTC_GSI) { +		ret =
> >>>>>>>> kvm_irq_delivery_to_apic(ioapic->kvm, NULL, &irqe,
> >>>>>>>> +				ioapic->rtc_status.dest_map);
> >>>>>>>> +		ioapic->rtc_status.pending_eoi = ret;
> >>>>>>> We should track status only if IRQ_STATUS ioctl was used to inject an
> >>>>>>> interrupt.
> >>>>>> We already know RTC will use IRQ_STATUS ioctl. Why check it again?
> >>>>>> 
> >>>>> QEMU does. QEMU is not the only userspace.
> >>>> And this will break other userspace.
> >>>> 
> >>> How?
> >> If other userspace has the reinjection logic for RTC, but it not uses IRQ_STATUS,
> > then it cannot get the right coalescing info. If it also use IRQ_STATUS to get
> > coalescing info, then we don't need the IRQ_STATUS check.
> >> 
> > If userspace does not care about irq status it does not use IRQ_STATUS
> > ioctl and we should not go extra mile to provide one. Not everyone cares
> > about running Windows as a guest.
> I see your point. But if no windows guest running, RTC is hardly used by other guests and the overheard can be ignore.
> 
Anyone can use RTC is Linux guest. Don't know about others.

--
			Gleb.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Zhang, Yang Z March 22, 2013, 10:50 a.m. UTC | #10
Gleb Natapov wrote on 2013-03-22:
> On Fri, Mar 22, 2013 at 08:51:47AM +0000, Zhang, Yang Z wrote:
>> Gleb Natapov wrote on 2013-03-22:
>>> On Fri, Mar 22, 2013 at 08:37:22AM +0000, Zhang, Yang Z wrote:
>>>> Gleb Natapov wrote on 2013-03-22:
>>>>> On Fri, Mar 22, 2013 at 08:25:21AM +0000, Zhang, Yang Z wrote:
>>>>>> Gleb Natapov wrote on 2013-03-22:
>>>>>>> On Fri, Mar 22, 2013 at 08:05:27AM +0000, Zhang, Yang Z wrote:
>>>>>>>> Gleb Natapov wrote on 2013-03-22:
>>>>>>>>> On Fri, Mar 22, 2013 at 01:24:05PM +0800, Yang Zhang wrote:
>>>>>>>>>> From: Yang Zhang <yang.z.zhang@Intel.com>
>>>>>>>>>> 
>>>>>>>>>> Current interrupt coalescing logci which only used by RTC has
>>>>>>>>>> conflict with Posted Interrupt. This patch introduces a new
>>>>>>>>>> mechinism to use eoi to track interrupt: When delivering an
>>>>>>>>>> interrupt to vcpu, the pending_eoi set to number of vcpu that
>>>>>>>>>> received the interrupt. And decrease it when each vcpu writing
>>>>>>>>>> eoi. No subsequent RTC interrupt can deliver to vcpu until all
>>>>>>>>>> vcpus write eoi.
>>>>>>>>>> 
>>>>>>>>>> Signed-off-by: Yang Zhang <yang.z.zhang@Intel.com>
>>>>>>>>>> ---
>>>>>>>>>>  virt/kvm/ioapic.c |   40
>>>>>>>>>>  +++++++++++++++++++++++++++++++++++++++- 1 files changed, 39
>>>>>>>>>>  insertions(+), 1 deletions(-)
>>>>>>>>>> diff --git a/virt/kvm/ioapic.c b/virt/kvm/ioapic.c
>>>>>>>>>> index c991e58..df16daf 100644
>>>>>>>>>> --- a/virt/kvm/ioapic.c
>>>>>>>>>> +++ b/virt/kvm/ioapic.c
>>>>>>>>>> @@ -114,6 +114,29 @@ static void rtc_irq_restore(struct kvm_ioapic
>>>>>>> *ioapic)
>>>>>>>>>>  	ioapic->rtc_status.pending_eoi = pending_eoi;
>>>>>>>>>>  }
>>>>>>>>>> +static void rtc_irq_ack_eoi(struct kvm_vcpu *vcpu,
>>>>>>>>>> +			struct rtc_status *rtc_status, int irq)
>>>>>>>>>> +{
>>>>>>>>>> +	if (irq != RTC_GSI)
>>>>>>>>>> +		return;
>>>>>>>>>> +
>>>>>>>>>> +	if (test_and_clear_bit(vcpu->vcpu_id, rtc_status->dest_map))
>>>>>>>>>> +		--rtc_status->pending_eoi;
>>>>>>>>>> +
>>>>>>>>>> +	WARN_ON(rtc_status->pending_eoi < 0);
>>>>>>>>>> +}
>>>>>>>>>> +
>>>>>>>>>> +static bool rtc_irq_check(struct kvm_ioapic *ioapic, int irq)
>>>>>>>>>> +{
>>>>>>>>>> +	if (irq != RTC_GSI)
>>>>>>>>>> +		return false;
>>>>>>>>>> +
>>>>>>>>>> +	if (ioapic->rtc_status.pending_eoi > 0)
>>>>>>>>>> +		return true; /* coalesced */
>>>>>>>>>> +
>>>>>>>>>> +	return false;
>>>>>>>>>> +}
>>>>>>>>>> +
>>>>>>>>>>  static int ioapic_service(struct kvm_ioapic *ioapic, unsigned int idx)
>>>>>>>>>>  {
>>>>>>>>>>  	union kvm_ioapic_redirect_entry *pent;
>>>>>>>>>> @@ -229,6 +252,7 @@ static int ioapic_deliver(struct kvm_ioapic
>>> *ioapic,
>>>>> int
>>>>>>>>> irq)
>>>>>>>>>>  {
>>>>>>>>>>  	union kvm_ioapic_redirect_entry *entry = &ioapic->redirtbl[irq];
>>>>>>>>>>  	struct kvm_lapic_irq irqe;
>>>>>>>>>> +	int ret;
>>>>>>>>>> 
>>>>>>>>>>  	ioapic_debug("dest=%x dest_mode=%x delivery_mode=%x "
>>>>>>>>>>  		     "vector=%x trig_mode=%x\n",
>>>>>>>>>> @@ -244,7 +268,14 @@ static int ioapic_deliver(struct kvm_ioapic
>>> *ioapic,
>>>>>>> int
>>>>>>>>> irq)
>>>>>>>>>>  	irqe.level = 1;
>>>>>>>>>>  	irqe.shorthand = 0;
>>>>>>>>>> -	return kvm_irq_delivery_to_apic(ioapic->kvm, NULL, &irqe,
>>>>>>>>>> NULL); +	if (irq == RTC_GSI) { +		ret =
>>>>>>>>>> kvm_irq_delivery_to_apic(ioapic->kvm, NULL, &irqe,
>>>>>>>>>> +				ioapic->rtc_status.dest_map);
>>>>>>>>>> +		ioapic->rtc_status.pending_eoi = ret;
>>>>>>>>> We should track status only if IRQ_STATUS ioctl was used to inject an
>>>>>>>>> interrupt.
>>>>>>>> We already know RTC will use IRQ_STATUS ioctl. Why check it again?
>>>>>>>> 
>>>>>>> QEMU does. QEMU is not the only userspace.
>>>>>> And this will break other userspace.
>>>>>> 
>>>>> How?
>>>> If other userspace has the reinjection logic for RTC, but it not uses
> IRQ_STATUS,
>>> then it cannot get the right coalescing info. If it also use IRQ_STATUS to get
>>> coalescing info, then we don't need the IRQ_STATUS check.
>>>> 
>>> If userspace does not care about irq status it does not use IRQ_STATUS
>>> ioctl and we should not go extra mile to provide one. Not everyone cares
>>> about running Windows as a guest.
>> I see your point. But if no windows guest running, RTC is hardly used
>> by other guests and the overheard can be ignore.
>> 
> Anyone can use RTC is Linux guest. Don't know about others.
I see.
Since pass IRQ_STATUS to ioapic need to change many functions, how about add a variable in rtc_status:
struct rtc_status {
      bool IRQ_STATUS
};

And set it in kvm_vm_ioctl():
case KVM_IRQ_LINE_STATUS:
	if(irq == RTC_GSI && ioapic)
		ioapic->rtc_status.IRQ_STATUS = true;

Best regards,
Yang


--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Gleb Natapov March 24, 2013, 9:20 a.m. UTC | #11
On Fri, Mar 22, 2013 at 10:50:55AM +0000, Zhang, Yang Z wrote:
> Gleb Natapov wrote on 2013-03-22:
> > On Fri, Mar 22, 2013 at 08:51:47AM +0000, Zhang, Yang Z wrote:
> >> Gleb Natapov wrote on 2013-03-22:
> >>> On Fri, Mar 22, 2013 at 08:37:22AM +0000, Zhang, Yang Z wrote:
> >>>> Gleb Natapov wrote on 2013-03-22:
> >>>>> On Fri, Mar 22, 2013 at 08:25:21AM +0000, Zhang, Yang Z wrote:
> >>>>>> Gleb Natapov wrote on 2013-03-22:
> >>>>>>> On Fri, Mar 22, 2013 at 08:05:27AM +0000, Zhang, Yang Z wrote:
> >>>>>>>> Gleb Natapov wrote on 2013-03-22:
> >>>>>>>>> On Fri, Mar 22, 2013 at 01:24:05PM +0800, Yang Zhang wrote:
> >>>>>>>>>> From: Yang Zhang <yang.z.zhang@Intel.com>
> >>>>>>>>>> 
> >>>>>>>>>> Current interrupt coalescing logci which only used by RTC has
> >>>>>>>>>> conflict with Posted Interrupt. This patch introduces a new
> >>>>>>>>>> mechinism to use eoi to track interrupt: When delivering an
> >>>>>>>>>> interrupt to vcpu, the pending_eoi set to number of vcpu that
> >>>>>>>>>> received the interrupt. And decrease it when each vcpu writing
> >>>>>>>>>> eoi. No subsequent RTC interrupt can deliver to vcpu until all
> >>>>>>>>>> vcpus write eoi.
> >>>>>>>>>> 
> >>>>>>>>>> Signed-off-by: Yang Zhang <yang.z.zhang@Intel.com>
> >>>>>>>>>> ---
> >>>>>>>>>>  virt/kvm/ioapic.c |   40
> >>>>>>>>>>  +++++++++++++++++++++++++++++++++++++++- 1 files changed, 39
> >>>>>>>>>>  insertions(+), 1 deletions(-)
> >>>>>>>>>> diff --git a/virt/kvm/ioapic.c b/virt/kvm/ioapic.c
> >>>>>>>>>> index c991e58..df16daf 100644
> >>>>>>>>>> --- a/virt/kvm/ioapic.c
> >>>>>>>>>> +++ b/virt/kvm/ioapic.c
> >>>>>>>>>> @@ -114,6 +114,29 @@ static void rtc_irq_restore(struct kvm_ioapic
> >>>>>>> *ioapic)
> >>>>>>>>>>  	ioapic->rtc_status.pending_eoi = pending_eoi;
> >>>>>>>>>>  }
> >>>>>>>>>> +static void rtc_irq_ack_eoi(struct kvm_vcpu *vcpu,
> >>>>>>>>>> +			struct rtc_status *rtc_status, int irq)
> >>>>>>>>>> +{
> >>>>>>>>>> +	if (irq != RTC_GSI)
> >>>>>>>>>> +		return;
> >>>>>>>>>> +
> >>>>>>>>>> +	if (test_and_clear_bit(vcpu->vcpu_id, rtc_status->dest_map))
> >>>>>>>>>> +		--rtc_status->pending_eoi;
> >>>>>>>>>> +
> >>>>>>>>>> +	WARN_ON(rtc_status->pending_eoi < 0);
> >>>>>>>>>> +}
> >>>>>>>>>> +
> >>>>>>>>>> +static bool rtc_irq_check(struct kvm_ioapic *ioapic, int irq)
> >>>>>>>>>> +{
> >>>>>>>>>> +	if (irq != RTC_GSI)
> >>>>>>>>>> +		return false;
> >>>>>>>>>> +
> >>>>>>>>>> +	if (ioapic->rtc_status.pending_eoi > 0)
> >>>>>>>>>> +		return true; /* coalesced */
> >>>>>>>>>> +
> >>>>>>>>>> +	return false;
> >>>>>>>>>> +}
> >>>>>>>>>> +
> >>>>>>>>>>  static int ioapic_service(struct kvm_ioapic *ioapic, unsigned int idx)
> >>>>>>>>>>  {
> >>>>>>>>>>  	union kvm_ioapic_redirect_entry *pent;
> >>>>>>>>>> @@ -229,6 +252,7 @@ static int ioapic_deliver(struct kvm_ioapic
> >>> *ioapic,
> >>>>> int
> >>>>>>>>> irq)
> >>>>>>>>>>  {
> >>>>>>>>>>  	union kvm_ioapic_redirect_entry *entry = &ioapic->redirtbl[irq];
> >>>>>>>>>>  	struct kvm_lapic_irq irqe;
> >>>>>>>>>> +	int ret;
> >>>>>>>>>> 
> >>>>>>>>>>  	ioapic_debug("dest=%x dest_mode=%x delivery_mode=%x "
> >>>>>>>>>>  		     "vector=%x trig_mode=%x\n",
> >>>>>>>>>> @@ -244,7 +268,14 @@ static int ioapic_deliver(struct kvm_ioapic
> >>> *ioapic,
> >>>>>>> int
> >>>>>>>>> irq)
> >>>>>>>>>>  	irqe.level = 1;
> >>>>>>>>>>  	irqe.shorthand = 0;
> >>>>>>>>>> -	return kvm_irq_delivery_to_apic(ioapic->kvm, NULL, &irqe,
> >>>>>>>>>> NULL); +	if (irq == RTC_GSI) { +		ret =
> >>>>>>>>>> kvm_irq_delivery_to_apic(ioapic->kvm, NULL, &irqe,
> >>>>>>>>>> +				ioapic->rtc_status.dest_map);
> >>>>>>>>>> +		ioapic->rtc_status.pending_eoi = ret;
> >>>>>>>>> We should track status only if IRQ_STATUS ioctl was used to inject an
> >>>>>>>>> interrupt.
> >>>>>>>> We already know RTC will use IRQ_STATUS ioctl. Why check it again?
> >>>>>>>> 
> >>>>>>> QEMU does. QEMU is not the only userspace.
> >>>>>> And this will break other userspace.
> >>>>>> 
> >>>>> How?
> >>>> If other userspace has the reinjection logic for RTC, but it not uses
> > IRQ_STATUS,
> >>> then it cannot get the right coalescing info. If it also use IRQ_STATUS to get
> >>> coalescing info, then we don't need the IRQ_STATUS check.
> >>>> 
> >>> If userspace does not care about irq status it does not use IRQ_STATUS
> >>> ioctl and we should not go extra mile to provide one. Not everyone cares
> >>> about running Windows as a guest.
> >> I see your point. But if no windows guest running, RTC is hardly used
> >> by other guests and the overheard can be ignore.
> >> 
> > Anyone can use RTC is Linux guest. Don't know about others.
> I see.
> Since pass IRQ_STATUS to ioapic need to change many functions, how about add a variable in rtc_status:
> struct rtc_status {
>       bool IRQ_STATUS
> };
> 
> And set it in kvm_vm_ioctl():
> case KVM_IRQ_LINE_STATUS:
> 	if(irq == RTC_GSI && ioapic)
> 		ioapic->rtc_status.IRQ_STATUS = true;
> 
I do not like that kind of hacks. Just put functions change in separate,
easy to review patch.

--
			Gleb.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Paolo Bonzini March 26, 2013, 2:14 p.m. UTC | #12
Il 22/03/2013 06:24, Yang Zhang ha scritto:
> +static void rtc_irq_ack_eoi(struct kvm_vcpu *vcpu,
> +			struct rtc_status *rtc_status, int irq)
> +{
> +	if (irq != RTC_GSI)
> +		return;
> +
> +	if (test_and_clear_bit(vcpu->vcpu_id, rtc_status->dest_map))
> +		--rtc_status->pending_eoi;
> +
> +	WARN_ON(rtc_status->pending_eoi < 0);
> +}

This is the only case where you're passing the struct rtc_status instead
of the struct kvm_ioapic.  Please use the latter, and make it the first
argument.

> @@ -244,7 +268,14 @@ static int ioapic_deliver(struct kvm_ioapic *ioapic, int irq)
>  	irqe.level = 1;
>  	irqe.shorthand = 0;
>  
> -	return kvm_irq_delivery_to_apic(ioapic->kvm, NULL, &irqe, NULL);
> +	if (irq == RTC_GSI) {
> +		ret = kvm_irq_delivery_to_apic(ioapic->kvm, NULL, &irqe,
> +				ioapic->rtc_status.dest_map);
> +		ioapic->rtc_status.pending_eoi = ret;

I think you should either add a

    BUG_ON(ioapic->rtc_status.pending_eoi != 0);

or use "ioapic->rtc_status.pending_eoi += ret" (or both).

> +	} else
> +		ret = kvm_irq_delivery_to_apic(ioapic->kvm, NULL, &irqe, NULL);
> +
> +	return ret;
>  }


Paolo
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Paolo Bonzini March 26, 2013, 2:26 p.m. UTC | #13
Il 22/03/2013 09:54, Gleb Natapov ha scritto:
>>>> > >> 
>>> > > If userspace does not care about irq status it does not use IRQ_STATUS
>>> > > ioctl and we should not go extra mile to provide one. Not everyone cares
>>> > > about running Windows as a guest.
>> > I see your point. But if no windows guest running, RTC is hardly used by other guests and the overheard can be ignore.
>> > 
> Anyone can use RTC is Linux guest. Don't know about others.

I do not think the additional code complexity is worth the speedup.

The KVM Userspace That Won't Be Merged doesn't even emulate the RTC
interrupt at all.

Paolo
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Zhang, Yang Z March 29, 2013, 3:25 a.m. UTC | #14
Paolo Bonzini wrote on 2013-03-26:
> Il 22/03/2013 06:24, Yang Zhang ha scritto:
>> +static void rtc_irq_ack_eoi(struct kvm_vcpu *vcpu,
>> +			struct rtc_status *rtc_status, int irq)
>> +{
>> +	if (irq != RTC_GSI)
>> +		return;
>> +
>> +	if (test_and_clear_bit(vcpu->vcpu_id, rtc_status->dest_map))
>> +		--rtc_status->pending_eoi;
>> +
>> +	WARN_ON(rtc_status->pending_eoi < 0);
>> +}
> 
> This is the only case where you're passing the struct rtc_status instead
> of the struct kvm_ioapic.  Please use the latter, and make it the first
> argument.
>
>> @@ -244,7 +268,14 @@ static int ioapic_deliver(struct kvm_ioapic *ioapic, int
> irq)
>>  	irqe.level = 1;
>>  	irqe.shorthand = 0;
>> -	return kvm_irq_delivery_to_apic(ioapic->kvm, NULL, &irqe, NULL);
>> +	if (irq == RTC_GSI) {
>> +		ret = kvm_irq_delivery_to_apic(ioapic->kvm, NULL, &irqe,
>> +				ioapic->rtc_status.dest_map);
>> +		ioapic->rtc_status.pending_eoi = ret;
> 
> I think you should either add a
> 
>     BUG_ON(ioapic->rtc_status.pending_eoi != 0);
> or use "ioapic->rtc_status.pending_eoi += ret" (or both).
> 
There may malicious guest to write EOI more than once. And the pending_eoi will be negative. But it should not be a bug. Just WARN_ON is enough. And we already do it in ack_eoi. So don't need to do duplicated thing here.

Best regards,
Yang


--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Paolo Bonzini March 29, 2013, 8:35 a.m. UTC | #15
Il 29/03/2013 04:25, Zhang, Yang Z ha scritto:
> Paolo Bonzini wrote on 2013-03-26:
>> Il 22/03/2013 06:24, Yang Zhang ha scritto:
>>> +static void rtc_irq_ack_eoi(struct kvm_vcpu *vcpu,
>>> +			struct rtc_status *rtc_status, int irq)
>>> +{
>>> +	if (irq != RTC_GSI)
>>> +		return;
>>> +
>>> +	if (test_and_clear_bit(vcpu->vcpu_id, rtc_status->dest_map))
>>> +		--rtc_status->pending_eoi;
>>> +
>>> +	WARN_ON(rtc_status->pending_eoi < 0);
>>> +}
>>
>> This is the only case where you're passing the struct rtc_status instead
>> of the struct kvm_ioapic.  Please use the latter, and make it the first
>> argument.
>>
>>> @@ -244,7 +268,14 @@ static int ioapic_deliver(struct kvm_ioapic *ioapic, int
>> irq)
>>>  	irqe.level = 1;
>>>  	irqe.shorthand = 0;
>>> -	return kvm_irq_delivery_to_apic(ioapic->kvm, NULL, &irqe, NULL);
>>> +	if (irq == RTC_GSI) {
>>> +		ret = kvm_irq_delivery_to_apic(ioapic->kvm, NULL, &irqe,
>>> +				ioapic->rtc_status.dest_map);
>>> +		ioapic->rtc_status.pending_eoi = ret;
>>
>> I think you should either add a
>>
>>     BUG_ON(ioapic->rtc_status.pending_eoi != 0);
>> or use "ioapic->rtc_status.pending_eoi += ret" (or both).
>>
> There may malicious guest to write EOI more than once. And the
> pending_eoi will be negative. But it should not be a bug. Just WARN_ON
> is enough. And we already do it in ack_eoi. So don't need to do
> duplicated thing here.

Even WARN_ON is too much if it is guest-triggerable.  But then it is
better to make it "+=", I think.

Paolo

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Zhang, Yang Z March 29, 2013, 8:42 a.m. UTC | #16
Paolo Bonzini wrote on 2013-03-29:
> Il 29/03/2013 04:25, Zhang, Yang Z ha scritto:
>> Paolo Bonzini wrote on 2013-03-26:
>>> Il 22/03/2013 06:24, Yang Zhang ha scritto:
>>>> +static void rtc_irq_ack_eoi(struct kvm_vcpu *vcpu,
>>>> +			struct rtc_status *rtc_status, int irq)
>>>> +{
>>>> +	if (irq != RTC_GSI)
>>>> +		return;
>>>> +
>>>> +	if (test_and_clear_bit(vcpu->vcpu_id, rtc_status->dest_map))
>>>> +		--rtc_status->pending_eoi;
>>>> +
>>>> +	WARN_ON(rtc_status->pending_eoi < 0);
>>>> +}
>>> 
>>> This is the only case where you're passing the struct rtc_status instead
>>> of the struct kvm_ioapic.  Please use the latter, and make it the first
>>> argument.
>>> 
>>>> @@ -244,7 +268,14 @@ static int ioapic_deliver(struct kvm_ioapic *ioapic, int
>>> irq)
>>>>  	irqe.level = 1;
>>>>  	irqe.shorthand = 0;
>>>> -	return kvm_irq_delivery_to_apic(ioapic->kvm, NULL, &irqe, NULL);
>>>> +	if (irq == RTC_GSI) {
>>>> +		ret = kvm_irq_delivery_to_apic(ioapic->kvm, NULL, &irqe,
>>>> +				ioapic->rtc_status.dest_map);
>>>> +		ioapic->rtc_status.pending_eoi = ret;
>>> 
>>> I think you should either add a
>>> 
>>>     BUG_ON(ioapic->rtc_status.pending_eoi != 0);
>>> or use "ioapic->rtc_status.pending_eoi += ret" (or both).
>>> 
>> There may malicious guest to write EOI more than once. And the
>> pending_eoi will be negative. But it should not be a bug. Just WARN_ON
>> is enough. And we already do it in ack_eoi. So don't need to do
>> duplicated thing here.
> 
> Even WARN_ON is too much if it is guest-triggerable.  But then it is
> better to make it "+=", I think.
No. If the above case happened, you will always hit the WARN_ON with "+=". 

Best regards,
Yang


--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Gleb Natapov April 2, 2013, 1:08 p.m. UTC | #17
On Fri, Mar 29, 2013 at 03:25:16AM +0000, Zhang, Yang Z wrote:
> Paolo Bonzini wrote on 2013-03-26:
> > Il 22/03/2013 06:24, Yang Zhang ha scritto:
> >> +static void rtc_irq_ack_eoi(struct kvm_vcpu *vcpu,
> >> +			struct rtc_status *rtc_status, int irq)
> >> +{
> >> +	if (irq != RTC_GSI)
> >> +		return;
> >> +
> >> +	if (test_and_clear_bit(vcpu->vcpu_id, rtc_status->dest_map))
> >> +		--rtc_status->pending_eoi;
> >> +
> >> +	WARN_ON(rtc_status->pending_eoi < 0);
> >> +}
> > 
> > This is the only case where you're passing the struct rtc_status instead
> > of the struct kvm_ioapic.  Please use the latter, and make it the first
> > argument.
> >
> >> @@ -244,7 +268,14 @@ static int ioapic_deliver(struct kvm_ioapic *ioapic, int
> > irq)
> >>  	irqe.level = 1;
> >>  	irqe.shorthand = 0;
> >> -	return kvm_irq_delivery_to_apic(ioapic->kvm, NULL, &irqe, NULL);
> >> +	if (irq == RTC_GSI) {
> >> +		ret = kvm_irq_delivery_to_apic(ioapic->kvm, NULL, &irqe,
> >> +				ioapic->rtc_status.dest_map);
> >> +		ioapic->rtc_status.pending_eoi = ret;
> > 
> > I think you should either add a
> > 
> >     BUG_ON(ioapic->rtc_status.pending_eoi != 0);
> > or use "ioapic->rtc_status.pending_eoi += ret" (or both).
> > 
> There may malicious guest to write EOI more than once. And the pending_eoi will be negative. But it should not be a bug. Just WARN_ON is enough. And we already do it in ack_eoi. So don't need to do duplicated thing here.
> 
Since we track vcpus that already called EOI and decrement pending_eoi
only once for each vcpu malicious guest cannot trigger it, but we
already do WARN_ON() in rtc_irq_ack_eoi(), so I am not sure we need
another one here. += will be correct (since pending_eoi == 0 here), but
confusing since it makes an impression that pending_eoi may not be zero.

--
			Gleb.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Zhang, Yang Z April 3, 2013, 12:21 a.m. UTC | #18
Gleb Natapov wrote on 2013-04-02:
> On Fri, Mar 29, 2013 at 03:25:16AM +0000, Zhang, Yang Z wrote:
>> Paolo Bonzini wrote on 2013-03-26:
>>> Il 22/03/2013 06:24, Yang Zhang ha scritto:
>>>> +static void rtc_irq_ack_eoi(struct kvm_vcpu *vcpu,
>>>> +			struct rtc_status *rtc_status, int irq)
>>>> +{
>>>> +	if (irq != RTC_GSI)
>>>> +		return;
>>>> +
>>>> +	if (test_and_clear_bit(vcpu->vcpu_id, rtc_status->dest_map))
>>>> +		--rtc_status->pending_eoi;
>>>> +
>>>> +	WARN_ON(rtc_status->pending_eoi < 0);
>>>> +}
>>> 
>>> This is the only case where you're passing the struct rtc_status instead
>>> of the struct kvm_ioapic.  Please use the latter, and make it the first
>>> argument.
>>> 
>>>> @@ -244,7 +268,14 @@ static int ioapic_deliver(struct kvm_ioapic *ioapic,
> int
>>> irq)
>>>>  	irqe.level = 1;
>>>>  	irqe.shorthand = 0;
>>>> -	return kvm_irq_delivery_to_apic(ioapic->kvm, NULL, &irqe, NULL);
>>>> +	if (irq == RTC_GSI) {
>>>> +		ret = kvm_irq_delivery_to_apic(ioapic->kvm, NULL, &irqe,
>>>> +				ioapic->rtc_status.dest_map);
>>>> +		ioapic->rtc_status.pending_eoi = ret;
>>> 
>>> I think you should either add a
>>> 
>>>     BUG_ON(ioapic->rtc_status.pending_eoi != 0);
>>> or use "ioapic->rtc_status.pending_eoi += ret" (or both).
>>> 
>> There may malicious guest to write EOI more than once. And the pending_eoi
> will be negative. But it should not be a bug. Just WARN_ON is enough. And we
> already do it in ack_eoi. So don't need to do duplicated thing here.
>> 
> Since we track vcpus that already called EOI and decrement pending_eoi
> only once for each vcpu malicious guest cannot trigger it, but we
> already do WARN_ON() in rtc_irq_ack_eoi(), so I am not sure we need
> another one here. += will be correct (since pending_eoi == 0 here), but
> confusing since it makes an impression that pending_eoi may not be zero.
Yes, I also make the wrong impression.
With previous implementation, the pening_eoi may not be zero: Calculate the destination vcpu via parse IOAPIC entry, and if using lowest priority deliver mode, set all possible vcpus in dest_map even it doesn't receive it finally. At same time, a malicious guest can send IPI with same vector of RTC to those vcpus who is in dest_map but not have RTC interrupt. Then the pending_eoi will be negative.
Now, we set the dest_map with the vcpus who really received the interrupt. The above case cannot happen. So as you and Paolo suggested, it is better to use +=.

Best regards,
Yang

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Gleb Natapov April 3, 2013, 4:03 a.m. UTC | #19
On Wed, Apr 03, 2013 at 12:21:05AM +0000, Zhang, Yang Z wrote:
> Gleb Natapov wrote on 2013-04-02:
> > On Fri, Mar 29, 2013 at 03:25:16AM +0000, Zhang, Yang Z wrote:
> >> Paolo Bonzini wrote on 2013-03-26:
> >>> Il 22/03/2013 06:24, Yang Zhang ha scritto:
> >>>> +static void rtc_irq_ack_eoi(struct kvm_vcpu *vcpu,
> >>>> +			struct rtc_status *rtc_status, int irq)
> >>>> +{
> >>>> +	if (irq != RTC_GSI)
> >>>> +		return;
> >>>> +
> >>>> +	if (test_and_clear_bit(vcpu->vcpu_id, rtc_status->dest_map))
> >>>> +		--rtc_status->pending_eoi;
> >>>> +
> >>>> +	WARN_ON(rtc_status->pending_eoi < 0);
> >>>> +}
> >>> 
> >>> This is the only case where you're passing the struct rtc_status instead
> >>> of the struct kvm_ioapic.  Please use the latter, and make it the first
> >>> argument.
> >>> 
> >>>> @@ -244,7 +268,14 @@ static int ioapic_deliver(struct kvm_ioapic *ioapic,
> > int
> >>> irq)
> >>>>  	irqe.level = 1;
> >>>>  	irqe.shorthand = 0;
> >>>> -	return kvm_irq_delivery_to_apic(ioapic->kvm, NULL, &irqe, NULL);
> >>>> +	if (irq == RTC_GSI) {
> >>>> +		ret = kvm_irq_delivery_to_apic(ioapic->kvm, NULL, &irqe,
> >>>> +				ioapic->rtc_status.dest_map);
> >>>> +		ioapic->rtc_status.pending_eoi = ret;
> >>> 
> >>> I think you should either add a
> >>> 
> >>>     BUG_ON(ioapic->rtc_status.pending_eoi != 0);
> >>> or use "ioapic->rtc_status.pending_eoi += ret" (or both).
> >>> 
> >> There may malicious guest to write EOI more than once. And the pending_eoi
> > will be negative. But it should not be a bug. Just WARN_ON is enough. And we
> > already do it in ack_eoi. So don't need to do duplicated thing here.
> >> 
> > Since we track vcpus that already called EOI and decrement pending_eoi
> > only once for each vcpu malicious guest cannot trigger it, but we
> > already do WARN_ON() in rtc_irq_ack_eoi(), so I am not sure we need
> > another one here. += will be correct (since pending_eoi == 0 here), but
> > confusing since it makes an impression that pending_eoi may not be zero.
> Yes, I also make the wrong impression.
> With previous implementation, the pening_eoi may not be zero: Calculate the destination vcpu via parse IOAPIC entry, and if using lowest priority deliver mode, set all possible vcpus in dest_map even it doesn't receive it finally. At same time, a malicious guest can send IPI with same vector of RTC to those vcpus who is in dest_map but not have RTC interrupt. Then the pending_eoi will be negative.
> Now, we set the dest_map with the vcpus who really received the interrupt. The above case cannot happen. So as you and Paolo suggested, it is better to use +=.
> 
I am not suggesting that it is better to use +=. We can add
BUG_ON(ioapic->rtc_status.pending_eoi != 0); but no need to resend
patches just for that.

--
			Gleb.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/virt/kvm/ioapic.c b/virt/kvm/ioapic.c
index c991e58..df16daf 100644
--- a/virt/kvm/ioapic.c
+++ b/virt/kvm/ioapic.c
@@ -114,6 +114,29 @@  static void rtc_irq_restore(struct kvm_ioapic *ioapic)
 	ioapic->rtc_status.pending_eoi = pending_eoi;
 }
 
+static void rtc_irq_ack_eoi(struct kvm_vcpu *vcpu,
+			struct rtc_status *rtc_status, int irq)
+{
+	if (irq != RTC_GSI)
+		return;
+
+	if (test_and_clear_bit(vcpu->vcpu_id, rtc_status->dest_map))
+		--rtc_status->pending_eoi;
+
+	WARN_ON(rtc_status->pending_eoi < 0);
+}
+
+static bool rtc_irq_check(struct kvm_ioapic *ioapic, int irq)
+{
+	if (irq != RTC_GSI)
+		return false;
+
+	if (ioapic->rtc_status.pending_eoi > 0)
+		return true; /* coalesced */
+
+	return false;
+}
+
 static int ioapic_service(struct kvm_ioapic *ioapic, unsigned int idx)
 {
 	union kvm_ioapic_redirect_entry *pent;
@@ -229,6 +252,7 @@  static int ioapic_deliver(struct kvm_ioapic *ioapic, int irq)
 {
 	union kvm_ioapic_redirect_entry *entry = &ioapic->redirtbl[irq];
 	struct kvm_lapic_irq irqe;
+	int ret;
 
 	ioapic_debug("dest=%x dest_mode=%x delivery_mode=%x "
 		     "vector=%x trig_mode=%x\n",
@@ -244,7 +268,14 @@  static int ioapic_deliver(struct kvm_ioapic *ioapic, int irq)
 	irqe.level = 1;
 	irqe.shorthand = 0;
 
-	return kvm_irq_delivery_to_apic(ioapic->kvm, NULL, &irqe, NULL);
+	if (irq == RTC_GSI) {
+		ret = kvm_irq_delivery_to_apic(ioapic->kvm, NULL, &irqe,
+				ioapic->rtc_status.dest_map);
+		ioapic->rtc_status.pending_eoi = ret;
+	} else
+		ret = kvm_irq_delivery_to_apic(ioapic->kvm, NULL, &irqe, NULL);
+
+	return ret;
 }
 
 int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int irq_source_id,
@@ -268,6 +299,11 @@  int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int irq_source_id,
 		ret = 1;
 	} else {
 		int edge = (entry.fields.trig_mode == IOAPIC_EDGE_TRIG);
+
+		if (rtc_irq_check(ioapic, irq)) {
+			ret = 0; /* coalesced */
+			goto out;
+		}
 		ioapic->irr |= mask;
 		if ((edge && old_irr != ioapic->irr) ||
 		    (!edge && !entry.fields.remote_irr))
@@ -275,6 +311,7 @@  int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int irq_source_id,
 		else
 			ret = 0; /* report coalesced interrupt */
 	}
+out:
 	trace_kvm_ioapic_set_irq(entry.bits, irq, ret == 0);
 	spin_unlock(&ioapic->lock);
 
@@ -302,6 +339,7 @@  static void __kvm_ioapic_update_eoi(struct kvm_vcpu *vcpu,
 		if (ent->fields.vector != vector)
 			continue;
 
+		rtc_irq_ack_eoi(vcpu, &ioapic->rtc_status, i);
 		/*
 		 * We are dropping lock while calling ack notifiers because ack
 		 * notifier callbacks for assigned devices call into IOAPIC