diff mbox

KVM: X86: expand ->arch.apic_arb_prio to u64

Message ID 1502192234-14068-1-git-send-email-longpeng2@huawei.com (mailing list archive)
State New, archived
Headers show

Commit Message

Longpeng(Mike) Aug. 8, 2017, 11:37 a.m. UTC
Currently 'apic_arb_prio' is int32_t, it's too short for long
time running. In our environment, it overflowed and then the
UBSAN was angry:

signed integer overflow:
2147483647 + 1 cannot be represented in type 'int'
CPU: 22 PID: 31237 Comm: qemu-kvm Tainted: ...
...
Call Trace:
 [<ffffffff81f030b6>] dump_stack+0x1e/0x20
 [<ffffffff81f03173>] ubsan_epilogue+0x12/0x55
 [<ffffffff81f04658>] handle_overflow+0x1ba/0x215
 [<ffffffff81f046dd>] __ubsan_handle_add_overflow+0x2a/0x31
 [<ffffffffa126cb1a>] __apic_accept_irq+0x57a/0x5d0 [kvm]
 [<ffffffffa126d14f>] kvm_apic_set_irq+0x9f/0xf0 [kvm]
 [<ffffffffa126db20>] kvm_irq_delivery_to_apic_fast+0x450/0x910 [kvm]
 [<ffffffffa127d8ea>] kvm_irq_delivery_to_apic+0xfa/0x7a0 [kvm]
 [<ffffffffa127e039>] kvm_set_msi+0xa9/0x100 [kvm]
 [<ffffffffa12871ed>] kvm_send_userspace_msi+0x14d/0x1f0 [kvm]
 [<ffffffffa11ed56e>] kvm_vm_ioctl+0x4ee/0xdd0 [kvm]
...

We expand it to u64, this is large enough. Suppose the vcpu receives
1000 irqs per second, then it won't overflow in 584942417 years.
( 18446744073709551615/1000/3600/24/365 = 584942417 )

Signed-off-by: Longpeng(Mike) <longpeng2@huawei.com>
---
 arch/x86/include/asm/kvm_host.h | 2 +-
 arch/x86/kvm/ioapic.h           | 3 ++-
 arch/x86/kvm/irq_comm.c         | 2 +-
 arch/x86/kvm/lapic.c            | 6 +++---
 4 files changed, 7 insertions(+), 6 deletions(-)

Comments

Paolo Bonzini Aug. 8, 2017, 1:08 p.m. UTC | #1
On 08/08/2017 13:37, Longpeng(Mike) wrote:
> Currently 'apic_arb_prio' is int32_t, it's too short for long
> time running. In our environment, it overflowed and then the
> UBSAN was angry:
> 
> signed integer overflow:
> 2147483647 + 1 cannot be represented in type 'int'
> CPU: 22 PID: 31237 Comm: qemu-kvm Tainted: ...
> ...
> Call Trace:
>  [<ffffffff81f030b6>] dump_stack+0x1e/0x20
>  [<ffffffff81f03173>] ubsan_epilogue+0x12/0x55
>  [<ffffffff81f04658>] handle_overflow+0x1ba/0x215
>  [<ffffffff81f046dd>] __ubsan_handle_add_overflow+0x2a/0x31
>  [<ffffffffa126cb1a>] __apic_accept_irq+0x57a/0x5d0 [kvm]
>  [<ffffffffa126d14f>] kvm_apic_set_irq+0x9f/0xf0 [kvm]
>  [<ffffffffa126db20>] kvm_irq_delivery_to_apic_fast+0x450/0x910 [kvm]
>  [<ffffffffa127d8ea>] kvm_irq_delivery_to_apic+0xfa/0x7a0 [kvm]
>  [<ffffffffa127e039>] kvm_set_msi+0xa9/0x100 [kvm]
>  [<ffffffffa12871ed>] kvm_send_userspace_msi+0x14d/0x1f0 [kvm]
>  [<ffffffffa11ed56e>] kvm_vm_ioctl+0x4ee/0xdd0 [kvm]
> ...
> 
> We expand it to u64, this is large enough. Suppose the vcpu receives
> 1000 irqs per second, then it won't overflow in 584942417 years.
> ( 18446744073709551615/1000/3600/24/365 = 584942417 )

Since you only look at the difference, changing it to uint32_t should be
enough.

Paolo

> Signed-off-by: Longpeng(Mike) <longpeng2@huawei.com>
> ---
>  arch/x86/include/asm/kvm_host.h | 2 +-
>  arch/x86/kvm/ioapic.h           | 3 ++-
>  arch/x86/kvm/irq_comm.c         | 2 +-
>  arch/x86/kvm/lapic.c            | 6 +++---
>  4 files changed, 7 insertions(+), 6 deletions(-)
> 
> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> index 87ac4fb..ce9a5f5 100644
> --- a/arch/x86/include/asm/kvm_host.h
> +++ b/arch/x86/include/asm/kvm_host.h
> @@ -499,7 +499,7 @@ struct kvm_vcpu_arch {
>  	bool apicv_active;
>  	DECLARE_BITMAP(ioapic_handled_vectors, 256);
>  	unsigned long apic_attention;
> -	int32_t apic_arb_prio;
> +	u64 apic_arb_prio;
>  	int mp_state;
>  	u64 ia32_misc_enable_msr;
>  	u64 smbase;
> diff --git a/arch/x86/kvm/ioapic.h b/arch/x86/kvm/ioapic.h
> index 29ce197..a26deed 100644
> --- a/arch/x86/kvm/ioapic.h
> +++ b/arch/x86/kvm/ioapic.h
> @@ -117,7 +117,8 @@ static inline int ioapic_in_kernel(struct kvm *kvm)
>  void kvm_rtc_eoi_tracking_restore_one(struct kvm_vcpu *vcpu);
>  bool kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source,
>  		int short_hand, unsigned int dest, int dest_mode);
> -int kvm_apic_compare_prio(struct kvm_vcpu *vcpu1, struct kvm_vcpu *vcpu2);
> +/* Return true if vcpu1's priority is lower */
> +bool kvm_apic_compare_prio(struct kvm_vcpu *vcpu1, struct kvm_vcpu *vcpu2);
>  void kvm_ioapic_update_eoi(struct kvm_vcpu *vcpu, int vector,
>  			int trigger_mode);
>  int kvm_ioapic_init(struct kvm *kvm);
> diff --git a/arch/x86/kvm/irq_comm.c b/arch/x86/kvm/irq_comm.c
> index 3cc3b2d..03b1487 100644
> --- a/arch/x86/kvm/irq_comm.c
> +++ b/arch/x86/kvm/irq_comm.c
> @@ -90,7 +90,7 @@ int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src,
>  			if (!kvm_vector_hashing_enabled()) {
>  				if (!lowest)
>  					lowest = vcpu;
> -				else if (kvm_apic_compare_prio(vcpu, lowest) < 0)
> +				else if (kvm_apic_compare_prio(vcpu, lowest))
>  					lowest = vcpu;
>  			} else {
>  				__set_bit(i, dest_vcpu_bitmap);
> diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
> index 589dcc1..1e2b1f2 100644
> --- a/arch/x86/kvm/lapic.c
> +++ b/arch/x86/kvm/lapic.c
> @@ -840,7 +840,7 @@ static inline bool kvm_apic_map_get_dest_lapic(struct kvm *kvm,
>  			if (lowest < 0)
>  				lowest = i;
>  			else if (kvm_apic_compare_prio((*dst)[i]->vcpu,
> -						(*dst)[lowest]->vcpu) < 0)
> +						(*dst)[lowest]->vcpu))
>  				lowest = i;
>  		}
>  	} else {
> @@ -1048,9 +1048,9 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode,
>  	return result;
>  }
>  
> -int kvm_apic_compare_prio(struct kvm_vcpu *vcpu1, struct kvm_vcpu *vcpu2)
> +bool kvm_apic_compare_prio(struct kvm_vcpu *vcpu1, struct kvm_vcpu *vcpu2)
>  {
> -	return vcpu1->arch.apic_arb_prio - vcpu2->arch.apic_arb_prio;
> +	return vcpu1->arch.apic_arb_prio < vcpu2->arch.apic_arb_prio;
>  }
>  
>  static bool kvm_ioapic_handles_vector(struct kvm_lapic *apic, int vector)
>
Longpeng(Mike) Aug. 8, 2017, 1:50 p.m. UTC | #2
On 2017/8/8 21:08, Paolo Bonzini wrote:

> On 08/08/2017 13:37, Longpeng(Mike) wrote:
>> Currently 'apic_arb_prio' is int32_t, it's too short for long
>> time running. In our environment, it overflowed and then the
>> UBSAN was angry:
>>
>> signed integer overflow:
>> 2147483647 + 1 cannot be represented in type 'int'
>> CPU: 22 PID: 31237 Comm: qemu-kvm Tainted: ...
>> ...
>> Call Trace:
>>  [<ffffffff81f030b6>] dump_stack+0x1e/0x20
>>  [<ffffffff81f03173>] ubsan_epilogue+0x12/0x55
>>  [<ffffffff81f04658>] handle_overflow+0x1ba/0x215
>>  [<ffffffff81f046dd>] __ubsan_handle_add_overflow+0x2a/0x31
>>  [<ffffffffa126cb1a>] __apic_accept_irq+0x57a/0x5d0 [kvm]
>>  [<ffffffffa126d14f>] kvm_apic_set_irq+0x9f/0xf0 [kvm]
>>  [<ffffffffa126db20>] kvm_irq_delivery_to_apic_fast+0x450/0x910 [kvm]
>>  [<ffffffffa127d8ea>] kvm_irq_delivery_to_apic+0xfa/0x7a0 [kvm]
>>  [<ffffffffa127e039>] kvm_set_msi+0xa9/0x100 [kvm]
>>  [<ffffffffa12871ed>] kvm_send_userspace_msi+0x14d/0x1f0 [kvm]
>>  [<ffffffffa11ed56e>] kvm_vm_ioctl+0x4ee/0xdd0 [kvm]
>> ...
>>
>> We expand it to u64, this is large enough. Suppose the vcpu receives
>> 1000 irqs per second, then it won't overflow in 584942417 years.
>> ( 18446744073709551615/1000/3600/24/365 = 584942417 )
> 
> Since you only look at the difference, changing it to uint32_t should be
> enough.


Hi Paolo,

I'm afraid uint32_t isn't enough. For 1000 irqs per second, it can only holds
49 days ( although the overflow won't cause any corruption ).

4294967295/1000/3600/24 = 49

> 
> Paolo
> 

> .
>
Paolo Bonzini Aug. 8, 2017, 1:57 p.m. UTC | #3
On 08/08/2017 15:50, Longpeng (Mike) wrote:
> 
> 
> On 2017/8/8 21:08, Paolo Bonzini wrote:
> 
>> On 08/08/2017 13:37, Longpeng(Mike) wrote:
>>> Currently 'apic_arb_prio' is int32_t, it's too short for long
>>> time running. In our environment, it overflowed and then the
>>> UBSAN was angry:
>>>
>>> signed integer overflow:
>>> 2147483647 + 1 cannot be represented in type 'int'
>>> CPU: 22 PID: 31237 Comm: qemu-kvm Tainted: ...
>>> ...
>>> Call Trace:
>>>  [<ffffffff81f030b6>] dump_stack+0x1e/0x20
>>>  [<ffffffff81f03173>] ubsan_epilogue+0x12/0x55
>>>  [<ffffffff81f04658>] handle_overflow+0x1ba/0x215
>>>  [<ffffffff81f046dd>] __ubsan_handle_add_overflow+0x2a/0x31
>>>  [<ffffffffa126cb1a>] __apic_accept_irq+0x57a/0x5d0 [kvm]
>>>  [<ffffffffa126d14f>] kvm_apic_set_irq+0x9f/0xf0 [kvm]
>>>  [<ffffffffa126db20>] kvm_irq_delivery_to_apic_fast+0x450/0x910 [kvm]
>>>  [<ffffffffa127d8ea>] kvm_irq_delivery_to_apic+0xfa/0x7a0 [kvm]
>>>  [<ffffffffa127e039>] kvm_set_msi+0xa9/0x100 [kvm]
>>>  [<ffffffffa12871ed>] kvm_send_userspace_msi+0x14d/0x1f0 [kvm]
>>>  [<ffffffffa11ed56e>] kvm_vm_ioctl+0x4ee/0xdd0 [kvm]
>>> ...
>>>
>>> We expand it to u64, this is large enough. Suppose the vcpu receives
>>> 1000 irqs per second, then it won't overflow in 584942417 years.
>>> ( 18446744073709551615/1000/3600/24/365 = 584942417 )
>>
>> Since you only look at the difference, changing it to uint32_t should be
>> enough.
> 
> 
> Hi Paolo,
> 
> I'm afraid uint32_t isn't enough. For 1000 irqs per second, it can only holds
> 49 days ( although the overflow won't cause any corruption ).

What matters is only the difference across 2 vCPUs.

And in fact even 32 bits are probably too many, 16 or even 8 should be
enough because overflowing arb_prio is a good thing.  If you have
delivered millions IRQs to VCPU0 (let's say for a day), and then switch
the interrupt to VCPU1, you don't want to the next day to have
interrupts going to VCPU1 only.  A short warm-up time (a few seconds?)
is acceptable, but then you should have interrupts distributed equally
between VCPU0 and VCPU1.  This can only happen if arb_prio overflows.

Paolo

> 4294967295/1000/3600/24 = 49
> 
>>
>> Paolo
>>
> 
>> .
>>
> 
>
Longpeng(Mike) Aug. 9, 2017, 12:59 a.m. UTC | #4
On 2017/8/8 21:57, Paolo Bonzini wrote:

> On 08/08/2017 15:50, Longpeng (Mike) wrote:
>>
>>
>> On 2017/8/8 21:08, Paolo Bonzini wrote:
>>
>>> On 08/08/2017 13:37, Longpeng(Mike) wrote:
>>>> Currently 'apic_arb_prio' is int32_t, it's too short for long
>>>> time running. In our environment, it overflowed and then the
>>>> UBSAN was angry:
>>>>
>>>> signed integer overflow:
>>>> 2147483647 + 1 cannot be represented in type 'int'
>>>> CPU: 22 PID: 31237 Comm: qemu-kvm Tainted: ...
>>>> ...
>>>> Call Trace:
>>>>  [<ffffffff81f030b6>] dump_stack+0x1e/0x20
>>>>  [<ffffffff81f03173>] ubsan_epilogue+0x12/0x55
>>>>  [<ffffffff81f04658>] handle_overflow+0x1ba/0x215
>>>>  [<ffffffff81f046dd>] __ubsan_handle_add_overflow+0x2a/0x31
>>>>  [<ffffffffa126cb1a>] __apic_accept_irq+0x57a/0x5d0 [kvm]
>>>>  [<ffffffffa126d14f>] kvm_apic_set_irq+0x9f/0xf0 [kvm]
>>>>  [<ffffffffa126db20>] kvm_irq_delivery_to_apic_fast+0x450/0x910 [kvm]
>>>>  [<ffffffffa127d8ea>] kvm_irq_delivery_to_apic+0xfa/0x7a0 [kvm]
>>>>  [<ffffffffa127e039>] kvm_set_msi+0xa9/0x100 [kvm]
>>>>  [<ffffffffa12871ed>] kvm_send_userspace_msi+0x14d/0x1f0 [kvm]
>>>>  [<ffffffffa11ed56e>] kvm_vm_ioctl+0x4ee/0xdd0 [kvm]
>>>> ...
>>>>
>>>> We expand it to u64, this is large enough. Suppose the vcpu receives
>>>> 1000 irqs per second, then it won't overflow in 584942417 years.
>>>> ( 18446744073709551615/1000/3600/24/365 = 584942417 )
>>>
>>> Since you only look at the difference, changing it to uint32_t should be
>>> enough.
>>
>>
>> Hi Paolo,
>>
>> I'm afraid uint32_t isn't enough. For 1000 irqs per second, it can only holds
>> 49 days ( although the overflow won't cause any corruption ).
> 
> What matters is only the difference across 2 vCPUs.
> 
> And in fact even 32 bits are probably too many, 16 or even 8 should be
> enough because overflowing arb_prio is a good thing.  If you have
> delivered millions IRQs to VCPU0 (let's say for a day), and then switch
> the interrupt to VCPU1, you don't want to the next day to have
> interrupts going to VCPU1 only.  A short warm-up time (a few seconds?)
> is acceptable, but then you should have interrupts distributed equally
> between VCPU0 and VCPU1.  This can only happen if arb_prio overflows.
> 


I understand now, thanks for your patience. :)
diff mbox

Patch

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 87ac4fb..ce9a5f5 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -499,7 +499,7 @@  struct kvm_vcpu_arch {
 	bool apicv_active;
 	DECLARE_BITMAP(ioapic_handled_vectors, 256);
 	unsigned long apic_attention;
-	int32_t apic_arb_prio;
+	u64 apic_arb_prio;
 	int mp_state;
 	u64 ia32_misc_enable_msr;
 	u64 smbase;
diff --git a/arch/x86/kvm/ioapic.h b/arch/x86/kvm/ioapic.h
index 29ce197..a26deed 100644
--- a/arch/x86/kvm/ioapic.h
+++ b/arch/x86/kvm/ioapic.h
@@ -117,7 +117,8 @@  static inline int ioapic_in_kernel(struct kvm *kvm)
 void kvm_rtc_eoi_tracking_restore_one(struct kvm_vcpu *vcpu);
 bool kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source,
 		int short_hand, unsigned int dest, int dest_mode);
-int kvm_apic_compare_prio(struct kvm_vcpu *vcpu1, struct kvm_vcpu *vcpu2);
+/* Return true if vcpu1's priority is lower */
+bool kvm_apic_compare_prio(struct kvm_vcpu *vcpu1, struct kvm_vcpu *vcpu2);
 void kvm_ioapic_update_eoi(struct kvm_vcpu *vcpu, int vector,
 			int trigger_mode);
 int kvm_ioapic_init(struct kvm *kvm);
diff --git a/arch/x86/kvm/irq_comm.c b/arch/x86/kvm/irq_comm.c
index 3cc3b2d..03b1487 100644
--- a/arch/x86/kvm/irq_comm.c
+++ b/arch/x86/kvm/irq_comm.c
@@ -90,7 +90,7 @@  int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src,
 			if (!kvm_vector_hashing_enabled()) {
 				if (!lowest)
 					lowest = vcpu;
-				else if (kvm_apic_compare_prio(vcpu, lowest) < 0)
+				else if (kvm_apic_compare_prio(vcpu, lowest))
 					lowest = vcpu;
 			} else {
 				__set_bit(i, dest_vcpu_bitmap);
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 589dcc1..1e2b1f2 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -840,7 +840,7 @@  static inline bool kvm_apic_map_get_dest_lapic(struct kvm *kvm,
 			if (lowest < 0)
 				lowest = i;
 			else if (kvm_apic_compare_prio((*dst)[i]->vcpu,
-						(*dst)[lowest]->vcpu) < 0)
+						(*dst)[lowest]->vcpu))
 				lowest = i;
 		}
 	} else {
@@ -1048,9 +1048,9 @@  static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode,
 	return result;
 }
 
-int kvm_apic_compare_prio(struct kvm_vcpu *vcpu1, struct kvm_vcpu *vcpu2)
+bool kvm_apic_compare_prio(struct kvm_vcpu *vcpu1, struct kvm_vcpu *vcpu2)
 {
-	return vcpu1->arch.apic_arb_prio - vcpu2->arch.apic_arb_prio;
+	return vcpu1->arch.apic_arb_prio < vcpu2->arch.apic_arb_prio;
 }
 
 static bool kvm_ioapic_handles_vector(struct kvm_lapic *apic, int vector)