diff mbox

[v2,8/8] KVM: Use eoi to track RTC interrupt delivery status

Message ID 1363591479-4653-9-git-send-email-yang.z.zhang@intel.com (mailing list archive)
State New, archived
Headers show

Commit Message

Zhang, Yang Z March 18, 2013, 7:24 a.m. UTC
From: Yang Zhang <yang.z.zhang@Intel.com>

Current interrupt coalescing logci which only used by RTC has conflict
with Posted Interrupt.
This patch introduces a new mechinism to use eoi to track interrupt:
When delivering an interrupt to vcpu, the need_eoi set to number of
vcpu that received the interrupt. And decrease it when each vcpu writing
eoi. No subsequent RTC interrupt can deliver to vcpu until all vcpus
write eoi.

Signed-off-by: Yang Zhang <yang.z.zhang@Intel.com>
---
 virt/kvm/ioapic.c |   67 +++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 files changed, 67 insertions(+), 0 deletions(-)

Comments

Gleb Natapov March 18, 2013, 10:11 a.m. UTC | #1
On Mon, Mar 18, 2013 at 03:24:39PM +0800, Yang Zhang wrote:
> From: Yang Zhang <yang.z.zhang@Intel.com>
> 
> Current interrupt coalescing logci which only used by RTC has conflict
> with Posted Interrupt.
> This patch introduces a new mechinism to use eoi to track interrupt:
> When delivering an interrupt to vcpu, the need_eoi set to number of
> vcpu that received the interrupt. And decrease it when each vcpu writing
> eoi. No subsequent RTC interrupt can deliver to vcpu until all vcpus
> write eoi.
> 
> Signed-off-by: Yang Zhang <yang.z.zhang@Intel.com>
> ---
>  virt/kvm/ioapic.c |   67 +++++++++++++++++++++++++++++++++++++++++++++++++++++
>  1 files changed, 67 insertions(+), 0 deletions(-)
> 
> diff --git a/virt/kvm/ioapic.c b/virt/kvm/ioapic.c
> index 7e47da8..8d498e5 100644
> --- a/virt/kvm/ioapic.c
> +++ b/virt/kvm/ioapic.c
> @@ -130,6 +130,48 @@ static void rtc_irq_get_dest_vcpu(struct kvm_ioapic *ioapic, int irq)
>  	kvm_get_dest_vcpu(ioapic->kvm, &irqe, ioapic->rtc_status.vcpu_map);
>  }
>  
> +static void rtc_irq_set_eoi(struct kvm_ioapic *ioapic, int irq)
> +{
> +	union kvm_ioapic_redirect_entry *entry = &ioapic->redirtbl[irq];
> +
> +	if (irq != 8)
> +		return;
> +
> +	if (likely(!bitmap_empty(ioapic->rtc_status.vcpu_map, KVM_MAX_VCPUS))) {
> +		if (entry->fields.delivery_mode == APIC_DM_LOWEST)
> +			ioapic->rtc_status.need_eoi = 1;
> +		else {
> +			int weight;
> +			weight = bitmap_weight(ioapic->rtc_status.vcpu_map,
> +					sizeof(ioapic->rtc_status.vcpu_map));
> +			ioapic->rtc_status.need_eoi = weight;
> +		}
> +	}
> +}
> +
> +static void rtc_irq_ack_eoi(struct kvm_vcpu *vcpu,
> +			struct rtc_status *rtc_status, int irq)
> +{
> +	if (irq != 8)
> +		return;
> +
> +	if (test_bit(vcpu->vcpu_id, rtc_status->vcpu_map))
If you do not use test_and_clear_bit() here the WARN_ON() bellow can
be triggered by a malicious guest. Lets define rtc_status->expected_eoi
bitmap and copy vcpu_map into expected_eoi on each RTC irq.

> +		--rtc_status->need_eoi;
> +
> +	WARN_ON(rtc_status->need_eoi < 0);
> +}
> +
> +static bool rtc_irq_check(struct kvm_ioapic *ioapic, int irq)
> +{
> +	if (irq != 8)
> +		return false;
> +
> +	if (ioapic->rtc_status.need_eoi > 0)
> +		return true; /* coalesced */
> +
> +	return false;
> +}
> +
>  #else
>  
>  static void rtc_irq_reset(struct kvm_ioapic *ioapic)
> @@ -146,6 +188,22 @@ static void rtc_irq_get_dest_vcpu(struct kvm_ioapic *ioapic, int irq)
>  {
>  	return;
>  }
> +
> +static void rtc_irq_set_eoi(struct kvm_ioapic *ioapic, int irq)
> +{
> +	return;
> +}
> +
> +static void rtc_irq_ack_eoi(struct kvm_vcpu *vcpu,
> +			struct rtc_status *rtc_status, int irq)
> +{
> +	return;
> +}
> +
> +static bool rtc_irq_check(struct kvm_ioapic *ioapic, int irq)
> +{
> +	return false;
> +}
>  #endif
>  
>  static int ioapic_service(struct kvm_ioapic *ioapic, unsigned int idx)
> @@ -282,6 +340,8 @@ static int ioapic_deliver(struct kvm_ioapic *ioapic, int irq)
>  	irqe.level = 1;
>  	irqe.shorthand = 0;
>  
> +	rtc_irq_set_eoi(ioapic, irq);
> +
>  	return kvm_irq_delivery_to_apic(ioapic->kvm, NULL, &irqe);
>  }
>  
> @@ -306,6 +366,11 @@ int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int irq_source_id,
>  		ret = 1;
>  	} else {
>  		int edge = (entry.fields.trig_mode == IOAPIC_EDGE_TRIG);
> +
> +		if (rtc_irq_check(ioapic, irq)) {
> +			ret = 0; /* coalesced */
> +			goto out;
> +		}
>  		ioapic->irr |= mask;
>  		if ((edge && old_irr != ioapic->irr) ||
>  		    (!edge && !entry.fields.remote_irr))
> @@ -313,6 +378,7 @@ int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int irq_source_id,
>  		else
>  			ret = 0; /* report coalesced interrupt */
>  	}
> +out:
>  	trace_kvm_ioapic_set_irq(entry.bits, irq, ret == 0);
>  	spin_unlock(&ioapic->lock);
>  
> @@ -340,6 +406,7 @@ static void __kvm_ioapic_update_eoi(struct kvm_vcpu *vcpu,
>  		if (ent->fields.vector != vector)
>  			continue;
>  
> +		rtc_irq_ack_eoi(vcpu, &ioapic->rtc_status, i);
>  		/*
>  		 * We are dropping lock while calling ack notifiers because ack
>  		 * notifier callbacks for assigned devices call into IOAPIC
> -- 
> 1.7.1

--
			Gleb.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Zhang, Yang Z March 18, 2013, 10:49 a.m. UTC | #2
Gleb Natapov wrote on 2013-03-18:
> On Mon, Mar 18, 2013 at 03:24:39PM +0800, Yang Zhang wrote:
>> From: Yang Zhang <yang.z.zhang@Intel.com>
>> 
>> Current interrupt coalescing logci which only used by RTC has conflict
>> with Posted Interrupt.
>> This patch introduces a new mechinism to use eoi to track interrupt:
>> When delivering an interrupt to vcpu, the need_eoi set to number of
>> vcpu that received the interrupt. And decrease it when each vcpu writing
>> eoi. No subsequent RTC interrupt can deliver to vcpu until all vcpus
>> write eoi.
>> 
>> Signed-off-by: Yang Zhang <yang.z.zhang@Intel.com>
>> ---
>>  virt/kvm/ioapic.c |   67
>>  +++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 files changed,
>>  67 insertions(+), 0 deletions(-)
>> diff --git a/virt/kvm/ioapic.c b/virt/kvm/ioapic.c
>> index 7e47da8..8d498e5 100644
>> --- a/virt/kvm/ioapic.c
>> +++ b/virt/kvm/ioapic.c
>> @@ -130,6 +130,48 @@ static void rtc_irq_get_dest_vcpu(struct kvm_ioapic
> *ioapic, int irq)
>>  	kvm_get_dest_vcpu(ioapic->kvm, &irqe, ioapic->rtc_status.vcpu_map);
>>  }
>> +static void rtc_irq_set_eoi(struct kvm_ioapic *ioapic, int irq) +{
>> +	union kvm_ioapic_redirect_entry *entry = &ioapic->redirtbl[irq]; +
>> +	if (irq != 8) +		return; + +	if
>> (likely(!bitmap_empty(ioapic->rtc_status.vcpu_map, KVM_MAX_VCPUS))) {
>> +		if (entry->fields.delivery_mode == APIC_DM_LOWEST)
>> +			ioapic->rtc_status.need_eoi = 1; +		else { +			int weight;
>> +			weight = bitmap_weight(ioapic->rtc_status.vcpu_map,
>> +					sizeof(ioapic->rtc_status.vcpu_map));
>> +			ioapic->rtc_status.need_eoi = weight; +		} +	} +} + +static void
>> rtc_irq_ack_eoi(struct kvm_vcpu *vcpu, +			struct rtc_status
>> *rtc_status, int irq) +{ +	if (irq != 8) +		return; + +	if
>> (test_bit(vcpu->vcpu_id, rtc_status->vcpu_map))
> If you do not use test_and_clear_bit() here the WARN_ON() bellow can
> be triggered by a malicious guest. Lets define rtc_status->expected_eoi
> bitmap and copy vcpu_map into expected_eoi on each RTC irq.
Sure.
 
>> +		--rtc_status->need_eoi;
>> +
>> +	WARN_ON(rtc_status->need_eoi < 0);
>> +}
>> +
>> +static bool rtc_irq_check(struct kvm_ioapic *ioapic, int irq)
>> +{
>> +	if (irq != 8)
>> +		return false;
>> +
>> +	if (ioapic->rtc_status.need_eoi > 0)
>> +		return true; /* coalesced */
>> +
>> +	return false;
>> +}
>> +
>>  #else
>>  
>>  static void rtc_irq_reset(struct kvm_ioapic *ioapic)
>> @@ -146,6 +188,22 @@ static void rtc_irq_get_dest_vcpu(struct kvm_ioapic
> *ioapic, int irq)
>>  {
>>  	return;
>>  }
>> +
>> +static void rtc_irq_set_eoi(struct kvm_ioapic *ioapic, int irq)
>> +{
>> +	return;
>> +}
>> +
>> +static void rtc_irq_ack_eoi(struct kvm_vcpu *vcpu,
>> +			struct rtc_status *rtc_status, int irq)
>> +{
>> +	return;
>> +}
>> +
>> +static bool rtc_irq_check(struct kvm_ioapic *ioapic, int irq)
>> +{
>> +	return false;
>> +}
>>  #endif
>>  
>>  static int ioapic_service(struct kvm_ioapic *ioapic, unsigned int idx)
>> @@ -282,6 +340,8 @@ static int ioapic_deliver(struct kvm_ioapic *ioapic, int
> irq)
>>  	irqe.level = 1;
>>  	irqe.shorthand = 0;
>> +	rtc_irq_set_eoi(ioapic, irq);
>> +
>>  	return kvm_irq_delivery_to_apic(ioapic->kvm, NULL, &irqe);
>>  }
>> @@ -306,6 +366,11 @@ int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int
> irq, int irq_source_id,
>>  		ret = 1;
>>  	} else {
>>  		int edge = (entry.fields.trig_mode == IOAPIC_EDGE_TRIG);
>> +
>> +		if (rtc_irq_check(ioapic, irq)) {
>> +			ret = 0; /* coalesced */
>> +			goto out;
>> +		}
>>  		ioapic->irr |= mask;
>>  		if ((edge && old_irr != ioapic->irr) ||
>>  		    (!edge && !entry.fields.remote_irr))
>> @@ -313,6 +378,7 @@ int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq,
> int irq_source_id,
>>  		else 			ret = 0; /* report coalesced interrupt */ 	} +out:
>>  	trace_kvm_ioapic_set_irq(entry.bits, irq, ret == 0);
>>  	spin_unlock(&ioapic->lock);
>> @@ -340,6 +406,7 @@ static void __kvm_ioapic_update_eoi(struct kvm_vcpu
> *vcpu,
>>  		if (ent->fields.vector != vector)
>>  			continue;
>> +		rtc_irq_ack_eoi(vcpu, &ioapic->rtc_status, i);
>>  		/*
>>  		 * We are dropping lock while calling ack notifiers because ack
>>  		 * notifier callbacks for assigned devices call into IOAPIC
>> --
>> 1.7.1
> 
> --
> 			Gleb.


Best regards,
Yang


--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Marcelo Tosatti March 19, 2013, 11:28 p.m. UTC | #3
On Mon, Mar 18, 2013 at 03:24:39PM +0800, Yang Zhang wrote:
> From: Yang Zhang <yang.z.zhang@Intel.com>
> 
> Current interrupt coalescing logci which only used by RTC has conflict
> with Posted Interrupt.
> This patch introduces a new mechinism to use eoi to track interrupt:
> When delivering an interrupt to vcpu, the need_eoi set to number of
> vcpu that received the interrupt. And decrease it when each vcpu writing
> eoi. No subsequent RTC interrupt can deliver to vcpu until all vcpus
> write eoi.
> 
> Signed-off-by: Yang Zhang <yang.z.zhang@Intel.com>
> ---
>  virt/kvm/ioapic.c |   67 +++++++++++++++++++++++++++++++++++++++++++++++++++++
>  1 files changed, 67 insertions(+), 0 deletions(-)
> 
> diff --git a/virt/kvm/ioapic.c b/virt/kvm/ioapic.c
> index 7e47da8..8d498e5 100644
> --- a/virt/kvm/ioapic.c
> +++ b/virt/kvm/ioapic.c
> @@ -130,6 +130,48 @@ static void rtc_irq_get_dest_vcpu(struct kvm_ioapic *ioapic, int irq)
>  	kvm_get_dest_vcpu(ioapic->kvm, &irqe, ioapic->rtc_status.vcpu_map);
>  }
>  
> +static void rtc_irq_set_eoi(struct kvm_ioapic *ioapic, int irq)
> +{
> +	union kvm_ioapic_redirect_entry *entry = &ioapic->redirtbl[irq];
> +
> +	if (irq != 8)
> +		return;
> +
> +	if (likely(!bitmap_empty(ioapic->rtc_status.vcpu_map, KVM_MAX_VCPUS))) {
> +		if (entry->fields.delivery_mode == APIC_DM_LOWEST)
> +			ioapic->rtc_status.need_eoi = 1;
> +		else {
> +			int weight;
> +			weight = bitmap_weight(ioapic->rtc_status.vcpu_map,
> +					sizeof(ioapic->rtc_status.vcpu_map));
> +			ioapic->rtc_status.need_eoi = weight;
> +		}
> +	}
> +}

Why two bitmaps are necessary? One should be enough.

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Zhang, Yang Z March 20, 2013, 2:47 a.m. UTC | #4
Marcelo Tosatti wrote on 2013-03-20:
> On Mon, Mar 18, 2013 at 03:24:39PM +0800, Yang Zhang wrote:
>> From: Yang Zhang <yang.z.zhang@Intel.com>
>> 
>> Current interrupt coalescing logci which only used by RTC has conflict
>> with Posted Interrupt.
>> This patch introduces a new mechinism to use eoi to track interrupt:
>> When delivering an interrupt to vcpu, the need_eoi set to number of
>> vcpu that received the interrupt. And decrease it when each vcpu writing
>> eoi. No subsequent RTC interrupt can deliver to vcpu until all vcpus
>> write eoi.
>> 
>> Signed-off-by: Yang Zhang <yang.z.zhang@Intel.com>
>> ---
>>  virt/kvm/ioapic.c |   67
>>  +++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 files changed,
>>  67 insertions(+), 0 deletions(-)
>> diff --git a/virt/kvm/ioapic.c b/virt/kvm/ioapic.c
>> index 7e47da8..8d498e5 100644
>> --- a/virt/kvm/ioapic.c
>> +++ b/virt/kvm/ioapic.c
>> @@ -130,6 +130,48 @@ static void rtc_irq_get_dest_vcpu(struct kvm_ioapic
> *ioapic, int irq)
>>  	kvm_get_dest_vcpu(ioapic->kvm, &irqe, ioapic->rtc_status.vcpu_map);
>>  }
>> +static void rtc_irq_set_eoi(struct kvm_ioapic *ioapic, int irq) +{
>> +	union kvm_ioapic_redirect_entry *entry = &ioapic->redirtbl[irq]; +
>> +	if (irq != 8) +		return; + +	if
>> (likely(!bitmap_empty(ioapic->rtc_status.vcpu_map, KVM_MAX_VCPUS))) {
>> +		if (entry->fields.delivery_mode == APIC_DM_LOWEST)
>> +			ioapic->rtc_status.need_eoi = 1; +		else { +			int weight;
>> +			weight = bitmap_weight(ioapic->rtc_status.vcpu_map,
>> +					sizeof(ioapic->rtc_status.vcpu_map));
>> +			ioapic->rtc_status.need_eoi = weight; +		} +	} +}
> 
> Why two bitmaps are necessary? One should be enough.
On eoi, it will clear the bitmap. So we need two bitmap, one only updated when rtc destination vcpu changed and one is copy of it for EOI check.


Best regards,
Yang


--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/virt/kvm/ioapic.c b/virt/kvm/ioapic.c
index 7e47da8..8d498e5 100644
--- a/virt/kvm/ioapic.c
+++ b/virt/kvm/ioapic.c
@@ -130,6 +130,48 @@  static void rtc_irq_get_dest_vcpu(struct kvm_ioapic *ioapic, int irq)
 	kvm_get_dest_vcpu(ioapic->kvm, &irqe, ioapic->rtc_status.vcpu_map);
 }
 
+static void rtc_irq_set_eoi(struct kvm_ioapic *ioapic, int irq)
+{
+	union kvm_ioapic_redirect_entry *entry = &ioapic->redirtbl[irq];
+
+	if (irq != 8)
+		return;
+
+	if (likely(!bitmap_empty(ioapic->rtc_status.vcpu_map, KVM_MAX_VCPUS))) {
+		if (entry->fields.delivery_mode == APIC_DM_LOWEST)
+			ioapic->rtc_status.need_eoi = 1;
+		else {
+			int weight;
+			weight = bitmap_weight(ioapic->rtc_status.vcpu_map,
+					sizeof(ioapic->rtc_status.vcpu_map));
+			ioapic->rtc_status.need_eoi = weight;
+		}
+	}
+}
+
+static void rtc_irq_ack_eoi(struct kvm_vcpu *vcpu,
+			struct rtc_status *rtc_status, int irq)
+{
+	if (irq != 8)
+		return;
+
+	if (test_bit(vcpu->vcpu_id, rtc_status->vcpu_map))
+		--rtc_status->need_eoi;
+
+	WARN_ON(rtc_status->need_eoi < 0);
+}
+
+static bool rtc_irq_check(struct kvm_ioapic *ioapic, int irq)
+{
+	if (irq != 8)
+		return false;
+
+	if (ioapic->rtc_status.need_eoi > 0)
+		return true; /* coalesced */
+
+	return false;
+}
+
 #else
 
 static void rtc_irq_reset(struct kvm_ioapic *ioapic)
@@ -146,6 +188,22 @@  static void rtc_irq_get_dest_vcpu(struct kvm_ioapic *ioapic, int irq)
 {
 	return;
 }
+
+static void rtc_irq_set_eoi(struct kvm_ioapic *ioapic, int irq)
+{
+	return;
+}
+
+static void rtc_irq_ack_eoi(struct kvm_vcpu *vcpu,
+			struct rtc_status *rtc_status, int irq)
+{
+	return;
+}
+
+static bool rtc_irq_check(struct kvm_ioapic *ioapic, int irq)
+{
+	return false;
+}
 #endif
 
 static int ioapic_service(struct kvm_ioapic *ioapic, unsigned int idx)
@@ -282,6 +340,8 @@  static int ioapic_deliver(struct kvm_ioapic *ioapic, int irq)
 	irqe.level = 1;
 	irqe.shorthand = 0;
 
+	rtc_irq_set_eoi(ioapic, irq);
+
 	return kvm_irq_delivery_to_apic(ioapic->kvm, NULL, &irqe);
 }
 
@@ -306,6 +366,11 @@  int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int irq_source_id,
 		ret = 1;
 	} else {
 		int edge = (entry.fields.trig_mode == IOAPIC_EDGE_TRIG);
+
+		if (rtc_irq_check(ioapic, irq)) {
+			ret = 0; /* coalesced */
+			goto out;
+		}
 		ioapic->irr |= mask;
 		if ((edge && old_irr != ioapic->irr) ||
 		    (!edge && !entry.fields.remote_irr))
@@ -313,6 +378,7 @@  int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int irq_source_id,
 		else
 			ret = 0; /* report coalesced interrupt */
 	}
+out:
 	trace_kvm_ioapic_set_irq(entry.bits, irq, ret == 0);
 	spin_unlock(&ioapic->lock);
 
@@ -340,6 +406,7 @@  static void __kvm_ioapic_update_eoi(struct kvm_vcpu *vcpu,
 		if (ent->fields.vector != vector)
 			continue;
 
+		rtc_irq_ack_eoi(vcpu, &ioapic->rtc_status, i);
 		/*
 		 * We are dropping lock while calling ack notifiers because ack
 		 * notifier callbacks for assigned devices call into IOAPIC