Message ID | 1502192234-14068-1-git-send-email-longpeng2@huawei.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
On 08/08/2017 13:37, Longpeng(Mike) wrote: > Currently 'apic_arb_prio' is int32_t, it's too short for long > time running. In our environment, it overflowed and then the > UBSAN was angry: > > signed integer overflow: > 2147483647 + 1 cannot be represented in type 'int' > CPU: 22 PID: 31237 Comm: qemu-kvm Tainted: ... > ... > Call Trace: > [<ffffffff81f030b6>] dump_stack+0x1e/0x20 > [<ffffffff81f03173>] ubsan_epilogue+0x12/0x55 > [<ffffffff81f04658>] handle_overflow+0x1ba/0x215 > [<ffffffff81f046dd>] __ubsan_handle_add_overflow+0x2a/0x31 > [<ffffffffa126cb1a>] __apic_accept_irq+0x57a/0x5d0 [kvm] > [<ffffffffa126d14f>] kvm_apic_set_irq+0x9f/0xf0 [kvm] > [<ffffffffa126db20>] kvm_irq_delivery_to_apic_fast+0x450/0x910 [kvm] > [<ffffffffa127d8ea>] kvm_irq_delivery_to_apic+0xfa/0x7a0 [kvm] > [<ffffffffa127e039>] kvm_set_msi+0xa9/0x100 [kvm] > [<ffffffffa12871ed>] kvm_send_userspace_msi+0x14d/0x1f0 [kvm] > [<ffffffffa11ed56e>] kvm_vm_ioctl+0x4ee/0xdd0 [kvm] > ... > > We expand it to u64, this is large enough. Suppose the vcpu receives > 1000 irqs per second, then it won't overflow in 584942417 years. > ( 18446744073709551615/1000/3600/24/365 = 584942417 ) Since you only look at the difference, changing it to uint32_t should be enough. Paolo > Signed-off-by: Longpeng(Mike) <longpeng2@huawei.com> > --- > arch/x86/include/asm/kvm_host.h | 2 +- > arch/x86/kvm/ioapic.h | 3 ++- > arch/x86/kvm/irq_comm.c | 2 +- > arch/x86/kvm/lapic.c | 6 +++--- > 4 files changed, 7 insertions(+), 6 deletions(-) > > diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h > index 87ac4fb..ce9a5f5 100644 > --- a/arch/x86/include/asm/kvm_host.h > +++ b/arch/x86/include/asm/kvm_host.h > @@ -499,7 +499,7 @@ struct kvm_vcpu_arch { > bool apicv_active; > DECLARE_BITMAP(ioapic_handled_vectors, 256); > unsigned long apic_attention; > - int32_t apic_arb_prio; > + u64 apic_arb_prio; > int mp_state; > u64 ia32_misc_enable_msr; > u64 smbase; > diff --git a/arch/x86/kvm/ioapic.h b/arch/x86/kvm/ioapic.h > index 29ce197..a26deed 100644 > --- a/arch/x86/kvm/ioapic.h > +++ b/arch/x86/kvm/ioapic.h > @@ -117,7 +117,8 @@ static inline int ioapic_in_kernel(struct kvm *kvm) > void kvm_rtc_eoi_tracking_restore_one(struct kvm_vcpu *vcpu); > bool kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source, > int short_hand, unsigned int dest, int dest_mode); > -int kvm_apic_compare_prio(struct kvm_vcpu *vcpu1, struct kvm_vcpu *vcpu2); > +/* Return true if vcpu1's priority is lower */ > +bool kvm_apic_compare_prio(struct kvm_vcpu *vcpu1, struct kvm_vcpu *vcpu2); > void kvm_ioapic_update_eoi(struct kvm_vcpu *vcpu, int vector, > int trigger_mode); > int kvm_ioapic_init(struct kvm *kvm); > diff --git a/arch/x86/kvm/irq_comm.c b/arch/x86/kvm/irq_comm.c > index 3cc3b2d..03b1487 100644 > --- a/arch/x86/kvm/irq_comm.c > +++ b/arch/x86/kvm/irq_comm.c > @@ -90,7 +90,7 @@ int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src, > if (!kvm_vector_hashing_enabled()) { > if (!lowest) > lowest = vcpu; > - else if (kvm_apic_compare_prio(vcpu, lowest) < 0) > + else if (kvm_apic_compare_prio(vcpu, lowest)) > lowest = vcpu; > } else { > __set_bit(i, dest_vcpu_bitmap); > diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c > index 589dcc1..1e2b1f2 100644 > --- a/arch/x86/kvm/lapic.c > +++ b/arch/x86/kvm/lapic.c > @@ -840,7 +840,7 @@ static inline bool kvm_apic_map_get_dest_lapic(struct kvm *kvm, > if (lowest < 0) > lowest = i; > else if (kvm_apic_compare_prio((*dst)[i]->vcpu, > - (*dst)[lowest]->vcpu) < 0) > + (*dst)[lowest]->vcpu)) > lowest = i; > } > } else { > @@ -1048,9 +1048,9 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode, > return result; > } > > -int kvm_apic_compare_prio(struct kvm_vcpu *vcpu1, struct kvm_vcpu *vcpu2) > +bool kvm_apic_compare_prio(struct kvm_vcpu *vcpu1, struct kvm_vcpu *vcpu2) > { > - return vcpu1->arch.apic_arb_prio - vcpu2->arch.apic_arb_prio; > + return vcpu1->arch.apic_arb_prio < vcpu2->arch.apic_arb_prio; > } > > static bool kvm_ioapic_handles_vector(struct kvm_lapic *apic, int vector) >
On 2017/8/8 21:08, Paolo Bonzini wrote: > On 08/08/2017 13:37, Longpeng(Mike) wrote: >> Currently 'apic_arb_prio' is int32_t, it's too short for long >> time running. In our environment, it overflowed and then the >> UBSAN was angry: >> >> signed integer overflow: >> 2147483647 + 1 cannot be represented in type 'int' >> CPU: 22 PID: 31237 Comm: qemu-kvm Tainted: ... >> ... >> Call Trace: >> [<ffffffff81f030b6>] dump_stack+0x1e/0x20 >> [<ffffffff81f03173>] ubsan_epilogue+0x12/0x55 >> [<ffffffff81f04658>] handle_overflow+0x1ba/0x215 >> [<ffffffff81f046dd>] __ubsan_handle_add_overflow+0x2a/0x31 >> [<ffffffffa126cb1a>] __apic_accept_irq+0x57a/0x5d0 [kvm] >> [<ffffffffa126d14f>] kvm_apic_set_irq+0x9f/0xf0 [kvm] >> [<ffffffffa126db20>] kvm_irq_delivery_to_apic_fast+0x450/0x910 [kvm] >> [<ffffffffa127d8ea>] kvm_irq_delivery_to_apic+0xfa/0x7a0 [kvm] >> [<ffffffffa127e039>] kvm_set_msi+0xa9/0x100 [kvm] >> [<ffffffffa12871ed>] kvm_send_userspace_msi+0x14d/0x1f0 [kvm] >> [<ffffffffa11ed56e>] kvm_vm_ioctl+0x4ee/0xdd0 [kvm] >> ... >> >> We expand it to u64, this is large enough. Suppose the vcpu receives >> 1000 irqs per second, then it won't overflow in 584942417 years. >> ( 18446744073709551615/1000/3600/24/365 = 584942417 ) > > Since you only look at the difference, changing it to uint32_t should be > enough. Hi Paolo, I'm afraid uint32_t isn't enough. For 1000 irqs per second, it can only holds 49 days ( although the overflow won't cause any corruption ). 4294967295/1000/3600/24 = 49 > > Paolo > > . >
On 08/08/2017 15:50, Longpeng (Mike) wrote: > > > On 2017/8/8 21:08, Paolo Bonzini wrote: > >> On 08/08/2017 13:37, Longpeng(Mike) wrote: >>> Currently 'apic_arb_prio' is int32_t, it's too short for long >>> time running. In our environment, it overflowed and then the >>> UBSAN was angry: >>> >>> signed integer overflow: >>> 2147483647 + 1 cannot be represented in type 'int' >>> CPU: 22 PID: 31237 Comm: qemu-kvm Tainted: ... >>> ... >>> Call Trace: >>> [<ffffffff81f030b6>] dump_stack+0x1e/0x20 >>> [<ffffffff81f03173>] ubsan_epilogue+0x12/0x55 >>> [<ffffffff81f04658>] handle_overflow+0x1ba/0x215 >>> [<ffffffff81f046dd>] __ubsan_handle_add_overflow+0x2a/0x31 >>> [<ffffffffa126cb1a>] __apic_accept_irq+0x57a/0x5d0 [kvm] >>> [<ffffffffa126d14f>] kvm_apic_set_irq+0x9f/0xf0 [kvm] >>> [<ffffffffa126db20>] kvm_irq_delivery_to_apic_fast+0x450/0x910 [kvm] >>> [<ffffffffa127d8ea>] kvm_irq_delivery_to_apic+0xfa/0x7a0 [kvm] >>> [<ffffffffa127e039>] kvm_set_msi+0xa9/0x100 [kvm] >>> [<ffffffffa12871ed>] kvm_send_userspace_msi+0x14d/0x1f0 [kvm] >>> [<ffffffffa11ed56e>] kvm_vm_ioctl+0x4ee/0xdd0 [kvm] >>> ... >>> >>> We expand it to u64, this is large enough. Suppose the vcpu receives >>> 1000 irqs per second, then it won't overflow in 584942417 years. >>> ( 18446744073709551615/1000/3600/24/365 = 584942417 ) >> >> Since you only look at the difference, changing it to uint32_t should be >> enough. > > > Hi Paolo, > > I'm afraid uint32_t isn't enough. For 1000 irqs per second, it can only holds > 49 days ( although the overflow won't cause any corruption ). What matters is only the difference across 2 vCPUs. And in fact even 32 bits are probably too many, 16 or even 8 should be enough because overflowing arb_prio is a good thing. If you have delivered millions IRQs to VCPU0 (let's say for a day), and then switch the interrupt to VCPU1, you don't want to the next day to have interrupts going to VCPU1 only. A short warm-up time (a few seconds?) is acceptable, but then you should have interrupts distributed equally between VCPU0 and VCPU1. This can only happen if arb_prio overflows. Paolo > 4294967295/1000/3600/24 = 49 > >> >> Paolo >> > >> . >> > >
On 2017/8/8 21:57, Paolo Bonzini wrote: > On 08/08/2017 15:50, Longpeng (Mike) wrote: >> >> >> On 2017/8/8 21:08, Paolo Bonzini wrote: >> >>> On 08/08/2017 13:37, Longpeng(Mike) wrote: >>>> Currently 'apic_arb_prio' is int32_t, it's too short for long >>>> time running. In our environment, it overflowed and then the >>>> UBSAN was angry: >>>> >>>> signed integer overflow: >>>> 2147483647 + 1 cannot be represented in type 'int' >>>> CPU: 22 PID: 31237 Comm: qemu-kvm Tainted: ... >>>> ... >>>> Call Trace: >>>> [<ffffffff81f030b6>] dump_stack+0x1e/0x20 >>>> [<ffffffff81f03173>] ubsan_epilogue+0x12/0x55 >>>> [<ffffffff81f04658>] handle_overflow+0x1ba/0x215 >>>> [<ffffffff81f046dd>] __ubsan_handle_add_overflow+0x2a/0x31 >>>> [<ffffffffa126cb1a>] __apic_accept_irq+0x57a/0x5d0 [kvm] >>>> [<ffffffffa126d14f>] kvm_apic_set_irq+0x9f/0xf0 [kvm] >>>> [<ffffffffa126db20>] kvm_irq_delivery_to_apic_fast+0x450/0x910 [kvm] >>>> [<ffffffffa127d8ea>] kvm_irq_delivery_to_apic+0xfa/0x7a0 [kvm] >>>> [<ffffffffa127e039>] kvm_set_msi+0xa9/0x100 [kvm] >>>> [<ffffffffa12871ed>] kvm_send_userspace_msi+0x14d/0x1f0 [kvm] >>>> [<ffffffffa11ed56e>] kvm_vm_ioctl+0x4ee/0xdd0 [kvm] >>>> ... >>>> >>>> We expand it to u64, this is large enough. Suppose the vcpu receives >>>> 1000 irqs per second, then it won't overflow in 584942417 years. >>>> ( 18446744073709551615/1000/3600/24/365 = 584942417 ) >>> >>> Since you only look at the difference, changing it to uint32_t should be >>> enough. >> >> >> Hi Paolo, >> >> I'm afraid uint32_t isn't enough. For 1000 irqs per second, it can only holds >> 49 days ( although the overflow won't cause any corruption ). > > What matters is only the difference across 2 vCPUs. > > And in fact even 32 bits are probably too many, 16 or even 8 should be > enough because overflowing arb_prio is a good thing. If you have > delivered millions IRQs to VCPU0 (let's say for a day), and then switch > the interrupt to VCPU1, you don't want to the next day to have > interrupts going to VCPU1 only. A short warm-up time (a few seconds?) > is acceptable, but then you should have interrupts distributed equally > between VCPU0 and VCPU1. This can only happen if arb_prio overflows. > I understand now, thanks for your patience. :)
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 87ac4fb..ce9a5f5 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -499,7 +499,7 @@ struct kvm_vcpu_arch { bool apicv_active; DECLARE_BITMAP(ioapic_handled_vectors, 256); unsigned long apic_attention; - int32_t apic_arb_prio; + u64 apic_arb_prio; int mp_state; u64 ia32_misc_enable_msr; u64 smbase; diff --git a/arch/x86/kvm/ioapic.h b/arch/x86/kvm/ioapic.h index 29ce197..a26deed 100644 --- a/arch/x86/kvm/ioapic.h +++ b/arch/x86/kvm/ioapic.h @@ -117,7 +117,8 @@ static inline int ioapic_in_kernel(struct kvm *kvm) void kvm_rtc_eoi_tracking_restore_one(struct kvm_vcpu *vcpu); bool kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source, int short_hand, unsigned int dest, int dest_mode); -int kvm_apic_compare_prio(struct kvm_vcpu *vcpu1, struct kvm_vcpu *vcpu2); +/* Return true if vcpu1's priority is lower */ +bool kvm_apic_compare_prio(struct kvm_vcpu *vcpu1, struct kvm_vcpu *vcpu2); void kvm_ioapic_update_eoi(struct kvm_vcpu *vcpu, int vector, int trigger_mode); int kvm_ioapic_init(struct kvm *kvm); diff --git a/arch/x86/kvm/irq_comm.c b/arch/x86/kvm/irq_comm.c index 3cc3b2d..03b1487 100644 --- a/arch/x86/kvm/irq_comm.c +++ b/arch/x86/kvm/irq_comm.c @@ -90,7 +90,7 @@ int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src, if (!kvm_vector_hashing_enabled()) { if (!lowest) lowest = vcpu; - else if (kvm_apic_compare_prio(vcpu, lowest) < 0) + else if (kvm_apic_compare_prio(vcpu, lowest)) lowest = vcpu; } else { __set_bit(i, dest_vcpu_bitmap); diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 589dcc1..1e2b1f2 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -840,7 +840,7 @@ static inline bool kvm_apic_map_get_dest_lapic(struct kvm *kvm, if (lowest < 0) lowest = i; else if (kvm_apic_compare_prio((*dst)[i]->vcpu, - (*dst)[lowest]->vcpu) < 0) + (*dst)[lowest]->vcpu)) lowest = i; } } else { @@ -1048,9 +1048,9 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode, return result; } -int kvm_apic_compare_prio(struct kvm_vcpu *vcpu1, struct kvm_vcpu *vcpu2) +bool kvm_apic_compare_prio(struct kvm_vcpu *vcpu1, struct kvm_vcpu *vcpu2) { - return vcpu1->arch.apic_arb_prio - vcpu2->arch.apic_arb_prio; + return vcpu1->arch.apic_arb_prio < vcpu2->arch.apic_arb_prio; } static bool kvm_ioapic_handles_vector(struct kvm_lapic *apic, int vector)
Currently 'apic_arb_prio' is int32_t, it's too short for long time running. In our environment, it overflowed and then the UBSAN was angry: signed integer overflow: 2147483647 + 1 cannot be represented in type 'int' CPU: 22 PID: 31237 Comm: qemu-kvm Tainted: ... ... Call Trace: [<ffffffff81f030b6>] dump_stack+0x1e/0x20 [<ffffffff81f03173>] ubsan_epilogue+0x12/0x55 [<ffffffff81f04658>] handle_overflow+0x1ba/0x215 [<ffffffff81f046dd>] __ubsan_handle_add_overflow+0x2a/0x31 [<ffffffffa126cb1a>] __apic_accept_irq+0x57a/0x5d0 [kvm] [<ffffffffa126d14f>] kvm_apic_set_irq+0x9f/0xf0 [kvm] [<ffffffffa126db20>] kvm_irq_delivery_to_apic_fast+0x450/0x910 [kvm] [<ffffffffa127d8ea>] kvm_irq_delivery_to_apic+0xfa/0x7a0 [kvm] [<ffffffffa127e039>] kvm_set_msi+0xa9/0x100 [kvm] [<ffffffffa12871ed>] kvm_send_userspace_msi+0x14d/0x1f0 [kvm] [<ffffffffa11ed56e>] kvm_vm_ioctl+0x4ee/0xdd0 [kvm] ... We expand it to u64, this is large enough. Suppose the vcpu receives 1000 irqs per second, then it won't overflow in 584942417 years. ( 18446744073709551615/1000/3600/24/365 = 584942417 ) Signed-off-by: Longpeng(Mike) <longpeng2@huawei.com> --- arch/x86/include/asm/kvm_host.h | 2 +- arch/x86/kvm/ioapic.h | 3 ++- arch/x86/kvm/irq_comm.c | 2 +- arch/x86/kvm/lapic.c | 6 +++--- 4 files changed, 7 insertions(+), 6 deletions(-)