Message ID | 1447037208-75615-1-git-send-email-feng.wu@intel.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Hi Paolo, Any comments about this patch, thanks in advance! Thanks, Feng > -----Original Message----- > From: Wu, Feng > Sent: Monday, November 9, 2015 10:47 AM > To: pbonzini@redhat.com > Cc: kvm@vger.kernel.org; linux-kernel@vger.kernel.org; Wu, Feng > <feng.wu@intel.com> > Subject: [PATCH] KVM: x86: Add lowest-priority support for vt-d posted- > interrupts > > Use vector-hashing to handle lowest-priority interrupts for > posted-interrupts. As an example, modern Intel CPUs use this > method to handle lowest-priority interrupts. > > Signed-off-by: Feng Wu <feng.wu@intel.com> > --- > arch/x86/include/asm/kvm_host.h | 2 ++ > arch/x86/kvm/irq_comm.c | 52 > +++++++++++++++++++++++++++++++++++++ > arch/x86/kvm/lapic.c | 57 > +++++++++++++++++++++++++++++++++++++++++ > arch/x86/kvm/lapic.h | 2 ++ > arch/x86/kvm/vmx.c | 14 ++++++++-- > 5 files changed, 125 insertions(+), 2 deletions(-) > > diff --git a/arch/x86/include/asm/kvm_host.h > b/arch/x86/include/asm/kvm_host.h > index 9265196..e225106 100644 > --- a/arch/x86/include/asm/kvm_host.h > +++ b/arch/x86/include/asm/kvm_host.h > @@ -1258,6 +1258,8 @@ bool kvm_vcpu_is_bsp(struct kvm_vcpu *vcpu); > > bool kvm_intr_is_single_vcpu(struct kvm *kvm, struct kvm_lapic_irq *irq, > struct kvm_vcpu **dest_vcpu); > +struct kvm_vcpu *kvm_intr_vector_hashing_dest(struct kvm *kvm, > + struct kvm_lapic_irq *irq); > > void kvm_set_msi_irq(struct kvm_kernel_irq_routing_entry *e, > struct kvm_lapic_irq *irq); > diff --git a/arch/x86/kvm/irq_comm.c b/arch/x86/kvm/irq_comm.c > index 84b96d3..8156e45 100644 > --- a/arch/x86/kvm/irq_comm.c > +++ b/arch/x86/kvm/irq_comm.c > @@ -266,6 +266,58 @@ out: > return r; > } > > +/* > + * This routine handles lowest-priority interrupts using vector-hashing > + * mechanism. As an example, modern Intel CPUs use this method to > handle > + * lowest-priority interrupts. > + * > + * Here is the details about the vector-hashing mechanism: > + * 1. For lowest-priority interrupts, store all the possible destination > + * vCPUs in an array. > + * 2. Use "guest vector % max number of destination vCPUs" to find the > right > + * destination vCPU in the array for the lowest-priority interrupt. > + */ > +struct kvm_vcpu *kvm_intr_vector_hashing_dest(struct kvm *kvm, > + struct kvm_lapic_irq *irq) > + > +{ > + unsigned long > dest_vcpu_bitmap[BITS_TO_LONGS(KVM_MAX_VCPUS)]; > + unsigned int dest_vcpus = 0; > + struct kvm_vcpu *vcpu; > + unsigned int i, mod, idx = 0; > + > + vcpu = kvm_intr_vector_hashing_dest_fast(kvm, irq); > + if (vcpu) > + return vcpu; > + > + memset(dest_vcpu_bitmap, 0, sizeof(dest_vcpu_bitmap)); > + > + kvm_for_each_vcpu(i, vcpu, kvm) { > + if (!kvm_apic_present(vcpu)) > + continue; > + > + if (!kvm_apic_match_dest(vcpu, NULL, irq->shorthand, > + irq->dest_id, irq->dest_mode)) > + continue; > + > + __set_bit(vcpu->vcpu_id, dest_vcpu_bitmap); > + dest_vcpus++; > + } > + > + if (dest_vcpus == 0) > + return NULL; > + > + mod = irq->vector % dest_vcpus; > + > + for (i = 0; i <= mod; i++) { > + idx = find_next_bit(dest_vcpu_bitmap, KVM_MAX_VCPUS, > idx) + 1; > + BUG_ON(idx >= KVM_MAX_VCPUS); > + } > + > + return kvm_get_vcpu(kvm, idx - 1); > +} > +EXPORT_SYMBOL_GPL(kvm_intr_vector_hashing_dest); > + > bool kvm_intr_is_single_vcpu(struct kvm *kvm, struct kvm_lapic_irq *irq, > struct kvm_vcpu **dest_vcpu) > { > diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c > index ecd4ea1..4937aa4 100644 > --- a/arch/x86/kvm/lapic.c > +++ b/arch/x86/kvm/lapic.c > @@ -816,6 +816,63 @@ out: > return ret; > } > > +struct kvm_vcpu *kvm_intr_vector_hashing_dest_fast(struct kvm *kvm, > + struct kvm_lapic_irq *irq) > +{ > + struct kvm_apic_map *map; > + struct kvm_vcpu *vcpu = NULL; > + > + if (irq->shorthand) > + return NULL; > + > + rcu_read_lock(); > + map = rcu_dereference(kvm->arch.apic_map); > + > + if (!map) > + goto out; > + > + if ((irq->dest_mode != APIC_DEST_PHYSICAL) && > + kvm_lowest_prio_delivery(irq)) { > + u16 cid; > + int i, idx = 0; > + unsigned long bitmap = 1; > + unsigned int mod, dest_vcpus = 0; > + struct kvm_lapic **dst = NULL; > + > + > + if (!kvm_apic_logical_map_valid(map)) > + goto out; > + > + apic_logical_id(map, irq->dest_id, &cid, (u16 *)&bitmap); > + > + if (cid >= ARRAY_SIZE(map->logical_map)) > + goto out; > + > + dst = map->logical_map[cid]; > + > + for_each_set_bit(i, &bitmap, 16) { > + if (!dst[i]) > + continue; > + > + dest_vcpus++; > + } > + > + mod = irq->vector % dest_vcpus; > + > + for (i = 0; i <= mod; i++) { > + idx = find_next_bit(&bitmap, KVM_MAX_VCPUS, idx) > + 1; > + BUG_ON(idx >= KVM_MAX_VCPUS); > + } > + > + if (kvm_apic_present(dst[idx-1]->vcpu)) > + vcpu = dst[idx-1]->vcpu; > + } > + > +out: > + rcu_read_unlock(); > + return vcpu; > +} > + > /* > * Add a pending IRQ into lapic. > * Return 1 if successfully added and 0 if discarded. > diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h > index fde8e35d..a6a775d 100644 > --- a/arch/x86/kvm/lapic.h > +++ b/arch/x86/kvm/lapic.h > @@ -170,4 +170,6 @@ void wait_lapic_expire(struct kvm_vcpu *vcpu); > > bool kvm_intr_is_single_vcpu_fast(struct kvm *kvm, struct kvm_lapic_irq > *irq, > struct kvm_vcpu **dest_vcpu); > +struct kvm_vcpu *kvm_intr_vector_hashing_dest_fast(struct kvm *kvm, > + struct kvm_lapic_irq *irq); > #endif > diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c > index 5eb56ed..57f71ee 100644 > --- a/arch/x86/kvm/vmx.c > +++ b/arch/x86/kvm/vmx.c > @@ -159,6 +159,9 @@ static int ple_window_actual_max = > KVM_VMX_DEFAULT_PLE_WINDOW_MAX; > static int ple_window_max = KVM_VMX_DEFAULT_PLE_WINDOW_MAX; > module_param(ple_window_max, int, S_IRUGO); > > +static bool __read_mostly enable_pi_vector_hashing = 1; > +module_param(enable_pi_vector_hashing, bool, S_IRUGO); > + > extern const ulong vmx_return; > > #define NR_AUTOLOAD_MSRS 8 > @@ -10702,8 +10705,15 @@ static int vmx_update_pi_irte(struct kvm *kvm, > unsigned int host_irq, > */ > > kvm_set_msi_irq(e, &irq); > - if (!kvm_intr_is_single_vcpu(kvm, &irq, &vcpu)) > - continue; > + if (!kvm_intr_is_single_vcpu(kvm, &irq, &vcpu)) { > + if ((!enable_pi_vector_hashing || > + irq.delivery_mode != APIC_DM_LOWEST)) > + continue; > + > + vcpu = kvm_intr_vector_hashing_dest(kvm, &irq); > + if (!vcpu) > + continue; > + } > > vcpu_info.pi_desc_addr = __pa(vcpu_to_pi_desc(vcpu)); > vcpu_info.vector = irq.vector; > -- > 2.1.0 -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
2015-11-09 10:46+0800, Feng Wu: > Use vector-hashing to handle lowest-priority interrupts for > posted-interrupts. As an example, modern Intel CPUs use this > method to handle lowest-priority interrupts. (I don't think it's a good idea that the algorithm differs from non-PI lowest priority delivery. I'd make them both vector-hashing, which would be "fun" to explain to people expecting round robin ...) > Signed-off-by: Feng Wu <feng.wu@intel.com> > --- > diff --git a/arch/x86/kvm/irq_comm.c b/arch/x86/kvm/irq_comm.c > +/* > + * This routine handles lowest-priority interrupts using vector-hashing > + * mechanism. As an example, modern Intel CPUs use this method to handle > + * lowest-priority interrupts. > + * > + * Here is the details about the vector-hashing mechanism: > + * 1. For lowest-priority interrupts, store all the possible destination > + * vCPUs in an array. > + * 2. Use "guest vector % max number of destination vCPUs" to find the right > + * destination vCPU in the array for the lowest-priority interrupt. > + */ (Is Skylake i7-6700 a modern Intel CPU? I didn't manage to get hashing ... all interrupts always went to the lowest APIC ID in the set :/ Is there a simple way to verify the algorithm?) > +struct kvm_vcpu *kvm_intr_vector_hashing_dest(struct kvm *kvm, > + struct kvm_lapic_irq *irq) > + > +{ > + unsigned long dest_vcpu_bitmap[BITS_TO_LONGS(KVM_MAX_VCPUS)]; > + unsigned int dest_vcpus = 0; > + struct kvm_vcpu *vcpu; > + unsigned int i, mod, idx = 0; > + > + vcpu = kvm_intr_vector_hashing_dest_fast(kvm, irq); > + if (vcpu) > + return vcpu; I think the rest of this function shouldn't be implemented: - Shorthands are only for IPIs and hence don't need to be handled, - Lowest priority physical broadcast is not supported, - Lowest priority cluster logical broadcast is not supported, - No point in optimizing mixed xAPIC and x2APIC mode, - The rest is handled by kvm_intr_vector_hashing_dest_fast(). (Even lowest priority flat logical "broadcast".) - We do the work twice when vcpu == NULL means that there is no matching destination. Is there a valid case that can be resolved by going through all vcpus? > + > + memset(dest_vcpu_bitmap, 0, sizeof(dest_vcpu_bitmap)); > + > + kvm_for_each_vcpu(i, vcpu, kvm) { > + if (!kvm_apic_present(vcpu)) > + continue; > + > + if (!kvm_apic_match_dest(vcpu, NULL, irq->shorthand, > + irq->dest_id, irq->dest_mode)) > + continue; > + > + __set_bit(vcpu->vcpu_id, dest_vcpu_bitmap); > + dest_vcpus++; > + } > + > + if (dest_vcpus == 0) > + return NULL; > + > + mod = irq->vector % dest_vcpus; > + > + for (i = 0; i <= mod; i++) { > + idx = find_next_bit(dest_vcpu_bitmap, KVM_MAX_VCPUS, idx) + 1; > + BUG_ON(idx >= KVM_MAX_VCPUS); > + } > + > + return kvm_get_vcpu(kvm, idx - 1); > +} > +EXPORT_SYMBOL_GPL(kvm_intr_vector_hashing_dest); > + > diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c > @@ -816,6 +816,63 @@ out: > +struct kvm_vcpu *kvm_intr_vector_hashing_dest_fast(struct kvm *kvm, > + struct kvm_lapic_irq *irq) We now have three very similar functions :( kvm_irq_delivery_to_apic_fast kvm_intr_is_single_vcpu_fast kvm_intr_vector_hashing_dest_fast By utilizing the gcc optimizer, they can be merged without introducing many instructions to the hot path, kvm_irq_delivery_to_apic_fast. (I would eventually do it, so you can save time by ignoring this.) Thanks. -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On 16/11/2015 20:03, Radim Kr?má? wrote: > 2015-11-09 10:46+0800, Feng Wu: >> Use vector-hashing to handle lowest-priority interrupts for >> posted-interrupts. As an example, modern Intel CPUs use this >> method to handle lowest-priority interrupts. > > (I don't think it's a good idea that the algorithm differs from non-PI > lowest priority delivery. I'd make them both vector-hashing, which > would be "fun" to explain to people expecting round robin ...) Yup, I would make it a module option. Thanks very much Radim for helping with the review. Paolo >> Signed-off-by: Feng Wu <feng.wu@intel.com> >> --- >> diff --git a/arch/x86/kvm/irq_comm.c b/arch/x86/kvm/irq_comm.c >> +/* >> + * This routine handles lowest-priority interrupts using vector-hashing >> + * mechanism. As an example, modern Intel CPUs use this method to handle >> + * lowest-priority interrupts. >> + * >> + * Here is the details about the vector-hashing mechanism: >> + * 1. For lowest-priority interrupts, store all the possible destination >> + * vCPUs in an array. >> + * 2. Use "guest vector % max number of destination vCPUs" to find the right >> + * destination vCPU in the array for the lowest-priority interrupt. >> + */ > > (Is Skylake i7-6700 a modern Intel CPU? > I didn't manage to get hashing ... all interrupts always went to the > lowest APIC ID in the set :/ > Is there a simple way to verify the algorithm?) > >> +struct kvm_vcpu *kvm_intr_vector_hashing_dest(struct kvm *kvm, >> + struct kvm_lapic_irq *irq) >> + >> +{ >> + unsigned long dest_vcpu_bitmap[BITS_TO_LONGS(KVM_MAX_VCPUS)]; >> + unsigned int dest_vcpus = 0; >> + struct kvm_vcpu *vcpu; >> + unsigned int i, mod, idx = 0; >> + >> + vcpu = kvm_intr_vector_hashing_dest_fast(kvm, irq); >> + if (vcpu) >> + return vcpu; > > I think the rest of this function shouldn't be implemented: > - Shorthands are only for IPIs and hence don't need to be handled, > - Lowest priority physical broadcast is not supported, > - Lowest priority cluster logical broadcast is not supported, > - No point in optimizing mixed xAPIC and x2APIC mode, > - The rest is handled by kvm_intr_vector_hashing_dest_fast(). > (Even lowest priority flat logical "broadcast".) > - We do the work twice when vcpu == NULL means that there is no > matching destination. > > Is there a valid case that can be resolved by going through all vcpus? > >> + >> + memset(dest_vcpu_bitmap, 0, sizeof(dest_vcpu_bitmap)); >> + >> + kvm_for_each_vcpu(i, vcpu, kvm) { >> + if (!kvm_apic_present(vcpu)) >> + continue; >> + >> + if (!kvm_apic_match_dest(vcpu, NULL, irq->shorthand, >> + irq->dest_id, irq->dest_mode)) >> + continue; >> + >> + __set_bit(vcpu->vcpu_id, dest_vcpu_bitmap); >> + dest_vcpus++; >> + } >> + >> + if (dest_vcpus == 0) >> + return NULL; >> + >> + mod = irq->vector % dest_vcpus; >> + >> + for (i = 0; i <= mod; i++) { >> + idx = find_next_bit(dest_vcpu_bitmap, KVM_MAX_VCPUS, idx) + 1; >> + BUG_ON(idx >= KVM_MAX_VCPUS); >> + } >> + >> + return kvm_get_vcpu(kvm, idx - 1); >> +} >> +EXPORT_SYMBOL_GPL(kvm_intr_vector_hashing_dest); >> + >> diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c >> @@ -816,6 +816,63 @@ out: >> +struct kvm_vcpu *kvm_intr_vector_hashing_dest_fast(struct kvm *kvm, >> + struct kvm_lapic_irq *irq) > > We now have three very similar functions :( > > kvm_irq_delivery_to_apic_fast > kvm_intr_is_single_vcpu_fast > kvm_intr_vector_hashing_dest_fast > > By utilizing the gcc optimizer, they can be merged without introducing > many instructions to the hot path, kvm_irq_delivery_to_apic_fast. > (I would eventually do it, so you can save time by ignoring this.) > > Thanks. > -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
> -----Original Message----- > From: Radim Kr?má? [mailto:rkrcmar@redhat.com] > Sent: Tuesday, November 17, 2015 3:03 AM > To: Wu, Feng <feng.wu@intel.com> > Cc: pbonzini@redhat.com; kvm@vger.kernel.org; linux- > kernel@vger.kernel.org > Subject: Re: [PATCH] KVM: x86: Add lowest-priority support for vt-d posted- > interrupts > > 2015-11-09 10:46+0800, Feng Wu: > > Use vector-hashing to handle lowest-priority interrupts for > > posted-interrupts. As an example, modern Intel CPUs use this > > method to handle lowest-priority interrupts. > > (I don't think it's a good idea that the algorithm differs from non-PI > lowest priority delivery. I'd make them both vector-hashing, which > would be "fun" to explain to people expecting round robin ...) > > > Signed-off-by: Feng Wu <feng.wu@intel.com> > > --- > > diff --git a/arch/x86/kvm/irq_comm.c b/arch/x86/kvm/irq_comm.c > > +/* > > + * This routine handles lowest-priority interrupts using vector-hashing > > + * mechanism. As an example, modern Intel CPUs use this method to > handle > > + * lowest-priority interrupts. > > + * > > + * Here is the details about the vector-hashing mechanism: > > + * 1. For lowest-priority interrupts, store all the possible destination > > + * vCPUs in an array. > > + * 2. Use "guest vector % max number of destination vCPUs" to find the > right > > + * destination vCPU in the array for the lowest-priority interrupt. > > + */ > > (Is Skylake i7-6700 a modern Intel CPU? > I didn't manage to get hashing ... all interrupts always went to the > lowest APIC ID in the set :/ > Is there a simple way to verify the algorithm?) Sorry for the late response, I try to get more information about vector hashing before getting back to you. Here is the response from our hardware architect: "I don't think we do any vector hashing on our client parts. This may be why the customer is not able to detect this on Skylake client silicon. The vector hashing is micro-architectural and something we had done on server parts. If you look at the haswell server CPU spec (https://www-ssl.intel.com/content/dam/www/public/us/en/documents/datasheets/xeon-e5-v3-datasheet-vol-2.pdf) In section 4.1.2, you will see an IntControl register (this is a register controlled/configured by BIOS) - see below. If you look at bits 6:4 in that register, you see the option we offer in hardware for what kind of redirection is applied to lowest priority interrupts. There are three options: 1. Fixed priority 2. Redirect last 3. Hash Vector If picking vector hash, then bits 10:8 specifies the APIC-ID bits used for the hashing." Thanks, Feng > > > +struct kvm_vcpu *kvm_intr_vector_hashing_dest(struct kvm *kvm, > > + struct kvm_lapic_irq *irq) > > + > > +{ > > + unsigned long > dest_vcpu_bitmap[BITS_TO_LONGS(KVM_MAX_VCPUS)]; > > + unsigned int dest_vcpus = 0; > > + struct kvm_vcpu *vcpu; > > + unsigned int i, mod, idx = 0; > > + > > + vcpu = kvm_intr_vector_hashing_dest_fast(kvm, irq); > > + if (vcpu) > > + return vcpu; > > I think the rest of this function shouldn't be implemented: > - Shorthands are only for IPIs and hence don't need to be handled, > - Lowest priority physical broadcast is not supported, > - Lowest priority cluster logical broadcast is not supported, > - No point in optimizing mixed xAPIC and x2APIC mode, > - The rest is handled by kvm_intr_vector_hashing_dest_fast(). > (Even lowest priority flat logical "broadcast".) > - We do the work twice when vcpu == NULL means that there is no > matching destination. > > Is there a valid case that can be resolved by going through all vcpus? > > > + > > + memset(dest_vcpu_bitmap, 0, sizeof(dest_vcpu_bitmap)); > > + > > + kvm_for_each_vcpu(i, vcpu, kvm) { > > + if (!kvm_apic_present(vcpu)) > > + continue; > > + > > + if (!kvm_apic_match_dest(vcpu, NULL, irq->shorthand, > > + irq->dest_id, irq->dest_mode)) > > + continue; > > + > > + __set_bit(vcpu->vcpu_id, dest_vcpu_bitmap); > > + dest_vcpus++; > > + } > > + > > + if (dest_vcpus == 0) > > + return NULL; > > + > > + mod = irq->vector % dest_vcpus; > > + > > + for (i = 0; i <= mod; i++) { > > + idx = find_next_bit(dest_vcpu_bitmap, KVM_MAX_VCPUS, > idx) + 1; > > + BUG_ON(idx >= KVM_MAX_VCPUS); > > + } > > + > > + return kvm_get_vcpu(kvm, idx - 1); > > +} > > +EXPORT_SYMBOL_GPL(kvm_intr_vector_hashing_dest); > > + > > diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c > > @@ -816,6 +816,63 @@ out: > > +struct kvm_vcpu *kvm_intr_vector_hashing_dest_fast(struct kvm *kvm, > > + struct kvm_lapic_irq *irq) > > We now have three very similar functions :( > > kvm_irq_delivery_to_apic_fast > kvm_intr_is_single_vcpu_fast > kvm_intr_vector_hashing_dest_fast > > By utilizing the gcc optimizer, they can be merged without introducing > many instructions to the hot path, kvm_irq_delivery_to_apic_fast. > (I would eventually do it, so you can save time by ignoring this.) > > Thanks. -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
> -----Original Message----- > From: Paolo Bonzini [mailto:pbonzini@redhat.com] > Sent: Tuesday, November 17, 2015 5:41 PM > To: Radim Kr?má? <rkrcmar@redhat.com>; Wu, Feng <feng.wu@intel.com> > Cc: kvm@vger.kernel.org; linux-kernel@vger.kernel.org > Subject: Re: [PATCH] KVM: x86: Add lowest-priority support for vt-d posted- > interrupts > > > > On 16/11/2015 20:03, Radim Kr?má? wrote: > > 2015-11-09 10:46+0800, Feng Wu: > >> Use vector-hashing to handle lowest-priority interrupts for > >> posted-interrupts. As an example, modern Intel CPUs use this > >> method to handle lowest-priority interrupts. > > > > (I don't think it's a good idea that the algorithm differs from non-PI > > lowest priority delivery. I'd make them both vector-hashing, which > > would be "fun" to explain to people expecting round robin ...) > > Yup, I would make it a module option. Thanks very much Radim for > helping with the review. Thanks for your guys' review. Yes, we can introduce a module option for it. According to Radim's comments above, we need use the same policy for PI and non-PI lowest-priority interrupts, so here is the question: for vector hashing, it is easy to apply it for both non-PI and PI case, however, for Round-Robin, in non-PI case, the round robin counter is used and updated when the interrupt is injected to guest, but for PI case, the interrupt is injected to guest totally by hardware, software cannot control it while interrupt delivery, we can only decide the destination vCPU for the PI interrupt in the initial configuration time (guest update vMSI -> QEMU -> KVM). Do you guys have any good suggestion to do round robin for PI lowest-priority? Seems Round robin is not a good way for PI lowest-priority interrupts. Any comments are appreciated! Thanks, Feng > > Paolo > > >> Signed-off-by: Feng Wu <feng.wu@intel.com> > >> --- > >> diff --git a/arch/x86/kvm/irq_comm.c b/arch/x86/kvm/irq_comm.c > >> +/* > >> + * This routine handles lowest-priority interrupts using vector-hashing > >> + * mechanism. As an example, modern Intel CPUs use this method to > handle > >> + * lowest-priority interrupts. > >> + * > >> + * Here is the details about the vector-hashing mechanism: > >> + * 1. For lowest-priority interrupts, store all the possible destination > >> + * vCPUs in an array. > >> + * 2. Use "guest vector % max number of destination vCPUs" to find the > right > >> + * destination vCPU in the array for the lowest-priority interrupt. > >> + */ > > > > (Is Skylake i7-6700 a modern Intel CPU? > > I didn't manage to get hashing ... all interrupts always went to the > > lowest APIC ID in the set :/ > > Is there a simple way to verify the algorithm?) > > > >> +struct kvm_vcpu *kvm_intr_vector_hashing_dest(struct kvm *kvm, > >> + struct kvm_lapic_irq *irq) > >> + > >> +{ > >> + unsigned long > dest_vcpu_bitmap[BITS_TO_LONGS(KVM_MAX_VCPUS)]; > >> + unsigned int dest_vcpus = 0; > >> + struct kvm_vcpu *vcpu; > >> + unsigned int i, mod, idx = 0; > >> + > >> + vcpu = kvm_intr_vector_hashing_dest_fast(kvm, irq); > >> + if (vcpu) > >> + return vcpu; > > > > I think the rest of this function shouldn't be implemented: > > - Shorthands are only for IPIs and hence don't need to be handled, > > - Lowest priority physical broadcast is not supported, > > - Lowest priority cluster logical broadcast is not supported, > > - No point in optimizing mixed xAPIC and x2APIC mode, > > - The rest is handled by kvm_intr_vector_hashing_dest_fast(). > > (Even lowest priority flat logical "broadcast".) > > - We do the work twice when vcpu == NULL means that there is no > > matching destination. > > > > Is there a valid case that can be resolved by going through all vcpus? > > > >> + > >> + memset(dest_vcpu_bitmap, 0, sizeof(dest_vcpu_bitmap)); > >> + > >> + kvm_for_each_vcpu(i, vcpu, kvm) { > >> + if (!kvm_apic_present(vcpu)) > >> + continue; > >> + > >> + if (!kvm_apic_match_dest(vcpu, NULL, irq->shorthand, > >> + irq->dest_id, irq->dest_mode)) > >> + continue; > >> + > >> + __set_bit(vcpu->vcpu_id, dest_vcpu_bitmap); > >> + dest_vcpus++; > >> + } > >> + > >> + if (dest_vcpus == 0) > >> + return NULL; > >> + > >> + mod = irq->vector % dest_vcpus; > >> + > >> + for (i = 0; i <= mod; i++) { > >> + idx = find_next_bit(dest_vcpu_bitmap, KVM_MAX_VCPUS, > idx) + 1; > >> + BUG_ON(idx >= KVM_MAX_VCPUS); > >> + } > >> + > >> + return kvm_get_vcpu(kvm, idx - 1); > >> +} > >> +EXPORT_SYMBOL_GPL(kvm_intr_vector_hashing_dest); > >> + > >> diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c > >> @@ -816,6 +816,63 @@ out: > >> +struct kvm_vcpu *kvm_intr_vector_hashing_dest_fast(struct kvm *kvm, > >> + struct kvm_lapic_irq *irq) > > > > We now have three very similar functions :( > > > > kvm_irq_delivery_to_apic_fast > > kvm_intr_is_single_vcpu_fast > > kvm_intr_vector_hashing_dest_fast > > > > By utilizing the gcc optimizer, they can be merged without introducing > > many instructions to the hot path, kvm_irq_delivery_to_apic_fast. > > (I would eventually do it, so you can save time by ignoring this.) > > > > Thanks. > >
2015-11-24 01:26+0000, Wu, Feng: > "I don't think we do any vector hashing on our client parts. This may be why the customer is not able to detect this on Skylake client silicon. > The vector hashing is micro-architectural and something we had done on server parts. > > If you look at the haswell server CPU spec (https://www-ssl.intel.com/content/dam/www/public/us/en/documents/datasheets/xeon-e5-v3-datasheet-vol-2.pdf) > In section 4.1.2, you will see an IntControl register (this is a register controlled/configured by BIOS) - see below. Thank you! > If you look at bits 6:4 in that register, you see the option we offer in hardware for what kind of redirection is applied to lowest priority interrupts. > There are three options: > 1. Fixed priority > 2. Redirect last > 3. Hash Vector > > If picking vector hash, then bits 10:8 specifies the APIC-ID bits used for the hashing." The hash function just interprets a subset of vector's bits as a number and uses that as a starting offset in a search for an enabled APIC within the destination set? For example: The x2APIC destination is 0x00000055 (= first four even APICs in cluster 0), the vector is 0b11100000, and bits 10:8 of IntControl are 000. 000 means that bits 7:4 of vector are selected, thus the vector hash is 0b1110 = 14, so the round-robin effectively does 14 % 4 (because we only have 4 destinations) and delivers to the 3rd possible APIC (= ID 6)? -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
2015-11-24 01:26+0000, Wu, Feng: >> From: Paolo Bonzini [mailto:pbonzini@redhat.com] >> On 16/11/2015 20:03, Radim Kr?má? wrote: >> > 2015-11-09 10:46+0800, Feng Wu: >> >> Use vector-hashing to handle lowest-priority interrupts for >> >> posted-interrupts. As an example, modern Intel CPUs use this >> >> method to handle lowest-priority interrupts. >> > >> > (I don't think it's a good idea that the algorithm differs from non-PI >> > lowest priority delivery. I'd make them both vector-hashing, which >> > would be "fun" to explain to people expecting round robin ...) >> >> Yup, I would make it a module option. Thanks very much Radim for >> helping with the review. > > Thanks for your guys' review. Yes, we can introduce a module option > for it. According to Radim's comments above, we need use the > same policy for PI and non-PI lowest-priority interrupts, so here is the > question: for vector hashing, it is easy to apply it for both non-PI and PI > case, however, for Round-Robin, in non-PI case, the round robin counter > is used and updated when the interrupt is injected to guest, but for > PI case, the interrupt is injected to guest totally by hardware, software > cannot control it while interrupt delivery, we can only decide the > destination vCPU for the PI interrupt in the initial configuration > time (guest update vMSI -> QEMU -> KVM). Do you guys have any good > suggestion to do round robin for PI lowest-priority? Seems Round robin > is not a good way for PI lowest-priority interrupts. Any comments > are appreciated! It's meaningless to try dynamic algorithms with PI so if we allow both lowest priority algorithms, I'd let PI handle any lowest priority only with vector hashing. (It's an ugly compromise.) -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On 24/11/2015 15:35, Radim Krcmár wrote: > > Thanks for your guys' review. Yes, we can introduce a module option > > for it. According to Radim's comments above, we need use the > > same policy for PI and non-PI lowest-priority interrupts, so here is the > > question: for vector hashing, it is easy to apply it for both non-PI and PI > > case, however, for Round-Robin, in non-PI case, the round robin counter > > is used and updated when the interrupt is injected to guest, but for > > PI case, the interrupt is injected to guest totally by hardware, software > > cannot control it while interrupt delivery, we can only decide the > > destination vCPU for the PI interrupt in the initial configuration > > time (guest update vMSI -> QEMU -> KVM). Do you guys have any good > > suggestion to do round robin for PI lowest-priority? Seems Round robin > > is not a good way for PI lowest-priority interrupts. Any comments > > are appreciated! > > It's meaningless to try dynamic algorithms with PI so if we allow both > lowest priority algorithms, I'd let PI handle any lowest priority only > with vector hashing. (It's an ugly compromise.) For now, I would just keep the 4.4 behavior, i.e. disable PI unless there is a single destination || vector hashing is enabled. We can flip the switch later. Paolo -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
2015-11-24 15:31+0100, Radim Kr?má?: > 000 means that bits 7:4 of vector are selected, thus the vector hash is > 0b1110 = 14, so the round-robin effectively does 14 % 4 (because we only > have 4 destinations) and delivers to the 3rd possible APIC (= ID 6)? Ah, 3rd APIC in the set has ID 4, of course :) -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
> -----Original Message----- > From: Paolo Bonzini [mailto:pbonzini@redhat.com] > Sent: Tuesday, November 24, 2015 10:38 PM > To: Radim Krcmár <rkrcmar@redhat.com>; Wu, Feng <feng.wu@intel.com> > Cc: kvm@vger.kernel.org; linux-kernel@vger.kernel.org > Subject: Re: [PATCH] KVM: x86: Add lowest-priority support for vt-d posted- > interrupts > > > > On 24/11/2015 15:35, Radim Krcmár wrote: > > > Thanks for your guys' review. Yes, we can introduce a module option > > > for it. According to Radim's comments above, we need use the > > > same policy for PI and non-PI lowest-priority interrupts, so here is the > > > question: for vector hashing, it is easy to apply it for both non-PI and PI > > > case, however, for Round-Robin, in non-PI case, the round robin counter > > > is used and updated when the interrupt is injected to guest, but for > > > PI case, the interrupt is injected to guest totally by hardware, software > > > cannot control it while interrupt delivery, we can only decide the > > > destination vCPU for the PI interrupt in the initial configuration > > > time (guest update vMSI -> QEMU -> KVM). Do you guys have any good > > > suggestion to do round robin for PI lowest-priority? Seems Round robin > > > is not a good way for PI lowest-priority interrupts. Any comments > > > are appreciated! > > > > It's meaningless to try dynamic algorithms with PI so if we allow both > > lowest priority algorithms, I'd let PI handle any lowest priority only > > with vector hashing. (It's an ugly compromise.) > > For now, I would just keep the 4.4 behavior, i.e. disable PI unless > there is a single destination || vector hashing is enabled. We can flip > the switch later. Okay, let me try to understand this clearly: - We will have a new KVM command line parameter to indicate whether vector hashing is enabled. - If it is not enabled, for PI, we can only support single destination lowest priority interrupts, for non-PI, we continue to use RR. - If it is enabled, for PI and non-PI we use vector hashing for both of them. Is this the case you have in mind? Thanks a lot! Thanks, Feng > > Paolo
> -----Original Message----- > From: Radim Kr?má? [mailto:rkrcmar@redhat.com] > Sent: Tuesday, November 24, 2015 10:32 PM > To: Wu, Feng <feng.wu@intel.com> > Cc: pbonzini@redhat.com; kvm@vger.kernel.org; linux- > kernel@vger.kernel.org > Subject: Re: [PATCH] KVM: x86: Add lowest-priority support for vt-d posted- > interrupts > > 2015-11-24 01:26+0000, Wu, Feng: > > "I don't think we do any vector hashing on our client parts. This may be > why the customer is not able to detect this on Skylake client silicon. > > The vector hashing is micro-architectural and something we had done on > server parts. > > > > If you look at the haswell server CPU spec (https://www- > ssl.intel.com/content/dam/www/public/us/en/documents/datasheets/xeon- > e5-v3-datasheet-vol-2.pdf) > > In section 4.1.2, you will see an IntControl register (this is a register > controlled/configured by BIOS) - see below. > > Thank you! > > > If you look at bits 6:4 in that register, you see the option we offer in > hardware for what kind of redirection is applied to lowest priority interrupts. > > There are three options: > > 1. Fixed priority > > 2. Redirect last > > 3. Hash Vector > > > > If picking vector hash, then bits 10:8 specifies the APIC-ID bits used for the > hashing." > > The hash function just interprets a subset of vector's bits as a number > and uses that as a starting offset in a search for an enabled APIC > within the destination set? > > For example: > The x2APIC destination is 0x00000055 (= first four even APICs in cluster > 0), the vector is 0b11100000, and bits 10:8 of IntControl are 000. > > 000 means that bits 7:4 of vector are selected, thus the vector hash is > 0b1110 = 14, so the round-robin effectively does 14 % 4 (because we only > have 4 destinations) and delivers to the 3rd possible APIC (= ID 6)? In my current implementation, I don't select a subset of vector's bits as the number, instead, I use the whole vector number. For software emulation p. o. v, do we really need to select a subset of the vector's bits as the base number? What is your opinion? Thanks a lot! Thank, Feng -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On 25/11/2015 02:58, Wu, Feng wrote: > Okay, let me try to understand this clearly: > - We will have a new KVM command line parameter to indicate whether > vector hashing is enabled. > - If it is not enabled, for PI, we can only support single destination lowest > priority interrupts, for non-PI, we continue to use RR. > - If it is enabled, for PI and non-PI we use vector hashing for both of them. > > Is this the case you have in mind? Thanks a lot! Yes, thanks! Paolo -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
2015-11-25 03:21+0000, Wu, Feng: > From: Radim Kr?má? [mailto:rkrcmar@redhat.com] >> The hash function just interprets a subset of vector's bits as a number >> and uses that as a starting offset in a search for an enabled APIC >> within the destination set? >> >> For example: >> The x2APIC destination is 0x00000055 (= first four even APICs in cluster >> 0), the vector is 0b11100000, and bits 10:8 of IntControl are 000. >> >> 000 means that bits 7:4 of vector are selected, thus the vector hash is >> 0b1110 = 14, so the round-robin effectively does 14 % 4 (because we only >> have 4 destinations) and delivers to the 3rd possible APIC (= ID 6)? > > In my current implementation, I don't select a subset of vector's bits as > the number, instead, I use the whole vector number. For software emulation > p. o. v, do we really need to select a subset of the vector's bits as the base > number? What is your opinion? Thanks a lot! I think it's ok to pick any algorithm we like. It's unlikely that software would recognize and take advantage of the hardware algorithm without adding a special treatment for KVM. (I'd vote for the simple pick-first-APIC lowest priority algorithm ... I don't see much point in complicating lowest priority when it doesn't deliver to lowest priority CPU anyway.) I mainly wanted to know what real hardware really does, because there is a lot of alternatives that still fit into the Xeon documentation. -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On 25/11/2015 15:12, Radim Krcmár wrote: > I think it's ok to pick any algorithm we like. It's unlikely that > software would recognize and take advantage of the hardware algorithm > without adding a special treatment for KVM. > (I'd vote for the simple pick-first-APIC lowest priority algorithm ... > I don't see much point in complicating lowest priority when it doesn't > deliver to lowest priority CPU anyway.) Vector hashing is an improvement for the common case where all vectors are set to all CPUs. Sure you can get an unlucky assignment, but it's still better than pick-first-APIC. Paolo -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
2015-11-25 15:38+0100, Paolo Bonzini: > On 25/11/2015 15:12, Radim Krcmár wrote: >> I think it's ok to pick any algorithm we like. It's unlikely that >> software would recognize and take advantage of the hardware algorithm >> without adding a special treatment for KVM. >> (I'd vote for the simple pick-first-APIC lowest priority algorithm ... >> I don't see much point in complicating lowest priority when it doesn't >> deliver to lowest priority CPU anyway.) > > Vector hashing is an improvement for the common case where all vectors > are set to all CPUs. Sure you can get an unlucky assignment, but it's > still better than pick-first-APIC. Yeah, hashing has a valid use case, but a subtle weighting of drawbacks led me to prefer pick-first-APIC ... (I'd prefer to have simple code in KVM and depend on static IRQ balancing in a guest to handle the distribution. The guest could get the unlucky assignment anyway, so it should be prepared; and hashing just made KVM worse in that case. Guests might also configure physical x(2)APIC, where is no lowest priority. And if the guest doesn't do anything with IRQs, then it might not even care about the impact that our choice has.) -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
> -----Original Message----- > From: Radim Kr?má? [mailto:rkrcmar@redhat.com] > Sent: Wednesday, November 25, 2015 11:43 PM > To: Paolo Bonzini <pbonzini@redhat.com> > Cc: Wu, Feng <feng.wu@intel.com>; kvm@vger.kernel.org; linux- > kernel@vger.kernel.org > Subject: Re: [PATCH] KVM: x86: Add lowest-priority support for vt-d posted- > interrupts > > 2015-11-25 15:38+0100, Paolo Bonzini: > > On 25/11/2015 15:12, Radim Krcmár wrote: > >> I think it's ok to pick any algorithm we like. It's unlikely that > >> software would recognize and take advantage of the hardware algorithm > >> without adding a special treatment for KVM. > >> (I'd vote for the simple pick-first-APIC lowest priority algorithm ... > >> I don't see much point in complicating lowest priority when it doesn't > >> deliver to lowest priority CPU anyway.) > > > > Vector hashing is an improvement for the common case where all vectors > > are set to all CPUs. Sure you can get an unlucky assignment, but it's > > still better than pick-first-APIC. > > Yeah, hashing has a valid use case, but a subtle weighting of drawbacks > led me to prefer pick-first-APIC ... Is it possible that pick-first-APIC policy make certain vCPU's irq workload too heavy? > > (I'd prefer to have simple code in KVM and depend on static IRQ balancing > in a guest to handle the distribution. > The guest could get the unlucky assignment anyway, so it should be > prepared; and hashing just made KVM worse in that case. Guests might > also configure physical x(2)APIC, where is no lowest priority. > And if the guest doesn't do anything with IRQs, then it might not even > care about the impact that our choice has.) Do do you guys have an agreement on how to handle this? Or we can implement the vector hashing at the current stage. then we can improve it like Radim mentioned above if it is really needed? Thanks, Feng
2015-11-26 06:24+0000, Wu, Feng: >> From: Radim Kr?má? [mailto:rkrcmar@redhat.com] >> 2015-11-25 15:38+0100, Paolo Bonzini: >>> On 25/11/2015 15:12, Radim Krcmár wrote: >>>> I think it's ok to pick any algorithm we like. It's unlikely that >>>> software would recognize and take advantage of the hardware algorithm >>>> without adding a special treatment for KVM. >>>> (I'd vote for the simple pick-first-APIC lowest priority algorithm ... >>>> I don't see much point in complicating lowest priority when it doesn't >>>> deliver to lowest priority CPU anyway.) >>> >>> Vector hashing is an improvement for the common case where all vectors >>> are set to all CPUs. Sure you can get an unlucky assignment, but it's >>> still better than pick-first-APIC. >> >> Yeah, hashing has a valid use case, but a subtle weighting of drawbacks >> led me to prefer pick-first-APIC ... > > Is it possible that pick-first-APIC policy make certain vCPU's irq workload too > heavy? It is, but vector hashing doesn't eliminate that possibility, just makes it significantly less likely. irqbalanced takes care of proper distribution in Linux guests. I'm not sure what other OS do, but they should have something like that as well. >> (I'd prefer to have simple code in KVM and depend on static IRQ balancing >> in a guest to handle the distribution. >> The guest could get the unlucky assignment anyway, so it should be >> prepared; and hashing just made KVM worse in that case. Guests might >> also configure physical x(2)APIC, where is no lowest priority. >> And if the guest doesn't do anything with IRQs, then it might not even >> care about the impact that our choice has.) > > Do do you guys have an agreement on how to handle this? Or we can implement > the vector hashing at the current stage. then we can improve it like Radim mentioned > above if it is really needed? Vector hashing is definitely an improvement over the current situation; I'll agree with any algorithm if it is reasonably implemented. (v1 fails delivery if chosen APIC is disabled, misuses KVM_MAX_VCPUS as bitmap size, and counting number of bits is better done with hweight16() -- these bugs would hopefully be fixed by having a common functinon :]) -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Hi Radim, > -----Original Message----- > From: Radim Kr?má? [mailto:rkrcmar@redhat.com] > Sent: Tuesday, November 17, 2015 3:03 AM > To: Wu, Feng <feng.wu@intel.com> > Cc: pbonzini@redhat.com; kvm@vger.kernel.org; linux-kernel@vger.kernel.org > Subject: Re: [PATCH] KVM: x86: Add lowest-priority support for vt-d posted- > interrupts > > 2015-11-09 10:46+0800, Feng Wu: > > Use vector-hashing to handle lowest-priority interrupts for > > posted-interrupts. As an example, modern Intel CPUs use this > > method to handle lowest-priority interrupts. > > (I don't think it's a good idea that the algorithm differs from non-PI > lowest priority delivery. I'd make them both vector-hashing, which > would be "fun" to explain to people expecting round robin ...) > > > Signed-off-by: Feng Wu <feng.wu@intel.com> > > --- > > diff --git a/arch/x86/kvm/irq_comm.c b/arch/x86/kvm/irq_comm.c > > +/* > > + * This routine handles lowest-priority interrupts using vector-hashing > > + * mechanism. As an example, modern Intel CPUs use this method to handle > > + * lowest-priority interrupts. > > + * > > + * Here is the details about the vector-hashing mechanism: > > + * 1. For lowest-priority interrupts, store all the possible destination > > + * vCPUs in an array. > > + * 2. Use "guest vector % max number of destination vCPUs" to find the right > > + * destination vCPU in the array for the lowest-priority interrupt. > > + */ > > (Is Skylake i7-6700 a modern Intel CPU? > I didn't manage to get hashing ... all interrupts always went to the > lowest APIC ID in the set :/ > Is there a simple way to verify the algorithm?) > > > +struct kvm_vcpu *kvm_intr_vector_hashing_dest(struct kvm *kvm, > > + struct kvm_lapic_irq *irq) > > + > > +{ > > + unsigned long dest_vcpu_bitmap[BITS_TO_LONGS(KVM_MAX_VCPUS)]; > > + unsigned int dest_vcpus = 0; > > + struct kvm_vcpu *vcpu; > > + unsigned int i, mod, idx = 0; > > + > > + vcpu = kvm_intr_vector_hashing_dest_fast(kvm, irq); > > + if (vcpu) > > + return vcpu; > > I think the rest of this function shouldn't be implemented: > - Shorthands are only for IPIs and hence don't need to be handled, > - Lowest priority physical broadcast is not supported, > - Lowest priority cluster logical broadcast is not supported, > - No point in optimizing mixed xAPIC and x2APIC mode, I read your comments again, and don't quite understand why we don't need PI optimization for mixed xAPIC and x2APIC mode. BTW, can we have mixed flat and cluster mode? Thanks, Feng > - The rest is handled by kvm_intr_vector_hashing_dest_fast(). > (Even lowest priority flat logical "broadcast".) > - We do the work twice when vcpu == NULL means that there is no > matching destination. > > Is there a valid case that can be resolved by going through all vcpus? > > > + > > + memset(dest_vcpu_bitmap, 0, sizeof(dest_vcpu_bitmap)); > > + > > + kvm_for_each_vcpu(i, vcpu, kvm) { > > + if (!kvm_apic_present(vcpu)) > > + continue; > > + > > + if (!kvm_apic_match_dest(vcpu, NULL, irq->shorthand, > > + irq->dest_id, irq->dest_mode)) > > + continue; > > + > > + __set_bit(vcpu->vcpu_id, dest_vcpu_bitmap); > > + dest_vcpus++; > > + } > > + > > + if (dest_vcpus == 0) > > + return NULL; > > + > > + mod = irq->vector % dest_vcpus; > > + > > + for (i = 0; i <= mod; i++) { > > + idx = find_next_bit(dest_vcpu_bitmap, KVM_MAX_VCPUS, idx) + > 1; > > + BUG_ON(idx >= KVM_MAX_VCPUS); > > + } > > + > > + return kvm_get_vcpu(kvm, idx - 1); > > +} > > +EXPORT_SYMBOL_GPL(kvm_intr_vector_hashing_dest); > > + > > diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c > > @@ -816,6 +816,63 @@ out: > > +struct kvm_vcpu *kvm_intr_vector_hashing_dest_fast(struct kvm *kvm, > > + struct kvm_lapic_irq *irq) > > We now have three very similar functions :( > > kvm_irq_delivery_to_apic_fast > kvm_intr_is_single_vcpu_fast > kvm_intr_vector_hashing_dest_fast > > By utilizing the gcc optimizer, they can be merged without introducing > many instructions to the hot path, kvm_irq_delivery_to_apic_fast. > (I would eventually do it, so you can save time by ignoring this.) > > Thanks. -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
2015-12-09 08:19+0000, Wu, Feng: >> -----Original Message----- >> From: Radim Kr?má? [mailto:rkrcmar@redhat.com] >> Sent: Tuesday, November 17, 2015 3:03 AM >> To: Wu, Feng <feng.wu@intel.com> >> Cc: pbonzini@redhat.com; kvm@vger.kernel.org; linux-kernel@vger.kernel.org >> Subject: Re: [PATCH] KVM: x86: Add lowest-priority support for vt-d posted- >> interrupts >> >> 2015-11-09 10:46+0800, Feng Wu: >> > +struct kvm_vcpu *kvm_intr_vector_hashing_dest(struct kvm *kvm, >> > + struct kvm_lapic_irq *irq) >> > + >> > +{ >> > + unsigned long dest_vcpu_bitmap[BITS_TO_LONGS(KVM_MAX_VCPUS)]; >> > + unsigned int dest_vcpus = 0; >> > + struct kvm_vcpu *vcpu; >> > + unsigned int i, mod, idx = 0; >> > + >> > + vcpu = kvm_intr_vector_hashing_dest_fast(kvm, irq); >> > + if (vcpu) >> > + return vcpu; >> >> I think the rest of this function shouldn't be implemented: >> - Shorthands are only for IPIs and hence don't need to be handled, >> - Lowest priority physical broadcast is not supported, >> - Lowest priority cluster logical broadcast is not supported, >> - No point in optimizing mixed xAPIC and x2APIC mode, > > I read your comments again, and don't quite understand why we > don't need PI optimization for mixed xAPIC and x2APIC mode. There shouldn't be a non-hobbyist operating system that uses mixed mode, so the optimization would practically be dead code as all other cases are handled by kvm_intr_vector_hashing_dest_fast(). I think that having extra code would bring problems in the future -- we need to take care of it when refactoring KVM's APIC and we should also write a unit-test for this otherwise dead path. I don't think that the benefit for guests would ever balance those efforts. (Physical xAPIC+x2APIC mode is still somewhat reasonable and xAPIC CPUs start with LDR=0, which means that operating system doesn't need to utilize mixed mode, as defined by KVM, when switching to x2APIC.) > BTW, can we have mixed flat and cluster mode? Yes, KVM recognizes that mixed mode, but luckily, there are severe limitations. Notes below SDM section 10.6.2.2: All processors that have their APIC software enabled (using the spurious vector enable/disable bit) must have their DFRs (Destination Format Registers) programmed identically. I hope there isn't a human that would use it in good faith. (Only NMI/SMI/INIT/SIPI are delivered in software disabled mode and if the system uses cluster xAPIC, OS should set DFR before LDR, which doesn't trigger mixed mode either.) -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
> -----Original Message----- > From: Radim Kr?má? [mailto:rkrcmar@redhat.com] > Sent: Wednesday, December 9, 2015 10:54 PM > To: Wu, Feng <feng.wu@intel.com> > Cc: pbonzini@redhat.com; kvm@vger.kernel.org; linux-kernel@vger.kernel.org > Subject: Re: [PATCH] KVM: x86: Add lowest-priority support for vt-d posted- > interrupts > > 2015-12-09 08:19+0000, Wu, Feng: > >> -----Original Message----- > >> From: Radim Kr?má? [mailto:rkrcmar@redhat.com] > >> Sent: Tuesday, November 17, 2015 3:03 AM > >> To: Wu, Feng <feng.wu@intel.com> > >> Cc: pbonzini@redhat.com; kvm@vger.kernel.org; linux- > kernel@vger.kernel.org > >> Subject: Re: [PATCH] KVM: x86: Add lowest-priority support for vt-d posted- > >> interrupts > >> > >> 2015-11-09 10:46+0800, Feng Wu: > >> > +struct kvm_vcpu *kvm_intr_vector_hashing_dest(struct kvm *kvm, > >> > + struct kvm_lapic_irq *irq) > >> > + > >> > +{ > >> > + unsigned long dest_vcpu_bitmap[BITS_TO_LONGS(KVM_MAX_VCPUS)]; > >> > + unsigned int dest_vcpus = 0; > >> > + struct kvm_vcpu *vcpu; > >> > + unsigned int i, mod, idx = 0; > >> > + > >> > + vcpu = kvm_intr_vector_hashing_dest_fast(kvm, irq); > >> > + if (vcpu) > >> > + return vcpu; > >> > >> I think the rest of this function shouldn't be implemented: > >> - Shorthands are only for IPIs and hence don't need to be handled, > >> - Lowest priority physical broadcast is not supported, > >> - Lowest priority cluster logical broadcast is not supported, > >> - No point in optimizing mixed xAPIC and x2APIC mode, > > > > I read your comments again, and don't quite understand why we > > don't need PI optimization for mixed xAPIC and x2APIC mode. > > There shouldn't be a non-hobbyist operating system that uses mixed mode, > so the optimization would practically be dead code as all other cases > are handled by kvm_intr_vector_hashing_dest_fast(). Thanks a lot for your elaboration! > > I think that having extra code would bring problems in the future -- we > need to take care of it when refactoring KVM's APIC and we should also > write a unit-test for this otherwise dead path. I don't think that the > benefit for guests would ever balance those efforts. > > (Physical xAPIC+x2APIC mode is still somewhat reasonable and xAPIC CPUs > start with LDR=0, which means that operating system doesn't need to > utilize mixed mode, as defined by KVM, when switching to x2APIC.) I think you mean Physical xAPIC+Physical x2APIC mode, right? For physical mode, we don't use LDR in any case, do we? So in physical mode, we only use the APIC ID, that is why they can be mixed, is my understanding correct? Thanks a lot! > > > BTW, can we have mixed flat and cluster mode? > > Yes, KVM recognizes that mixed mode, but luckily, there are severe > limitations. > > Notes below SDM section 10.6.2.2: > All processors that have their APIC software enabled (using the > spurious vector enable/disable bit) must have their DFRs (Destination > Format Registers) programmed identically. Thanks for pointing this out, good to know it! > > I hope there isn't a human that would use it in good faith. > > (Only NMI/SMI/INIT/SIPI are delivered in software disabled mode and if > the system uses cluster xAPIC, OS should set DFR before LDR, which > doesn't trigger mixed mode either.) Just curious, if the APIC is software disabled and it is in xAPIC mode. OS sets different value for DFR for different APICs, then when OS sets LDR, KVM can trigger mixed flat and cluster mode, right? Thanks, Feng
2015-12-10 01:52+0000, Wu, Feng: >> From: Radim Kr?má? [mailto:rkrcmar@redhat.com] >> (Physical xAPIC+x2APIC mode is still somewhat reasonable and xAPIC CPUs >> start with LDR=0, which means that operating system doesn't need to >> utilize mixed mode, as defined by KVM, when switching to x2APIC.) > > I think you mean Physical xAPIC+Physical x2APIC mode, right? For physical > mode, we don't use LDR in any case, do we? So in physical mode, we only > use the APIC ID, that is why they can be mixed, is my understanding correct? Yes. (Technically, physical and logical addressing is always active in APIC, but xAPIC must have nonzero LDR to accept logical interrupts[1].) If all xAPIC LDRs are zero, KVM doesn't enter a "mixed mode" even if some are xAPIC and some x2APIC [2]. 1: Real LAPICs probably do not accept broadcasts on APICs where LDR=0, KVM LAPICs do, but lowest priority broadcast is not allowed anyway, so PI doesn't care. 2: KVM allows OS-writeable APIC ID, which complicates things and real hardware probably doesn't allow it because of that ... we'd be saner with RO APIC ID, but it's not that bad. (And no major OS does it :]) >> the system uses cluster xAPIC, OS should set DFR before LDR, which >> doesn't trigger mixed mode either.) > > Just curious, if the APIC is software disabled and it is in xAPIC mode. OS sets > different value for DFR for different APICs, then when OS sets LDR, KVM can > trigger mixed flat and cluster mode, right? Exactly. APICs with zeroed LDR are ignored, so KVM will use the slow-path for delivery (= trigger mixed mode) at the moment the first APIC with different DFR is configured. -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
> -----Original Message----- > From: kvm-owner@vger.kernel.org [mailto:kvm-owner@vger.kernel.org] On > Behalf Of Radim Krcmár > Sent: Friday, December 11, 2015 10:38 PM > To: Wu, Feng <feng.wu@intel.com> > Cc: pbonzini@redhat.com; kvm@vger.kernel.org; linux-kernel@vger.kernel.org > Subject: Re: [PATCH] KVM: x86: Add lowest-priority support for vt-d posted- > interrupts > > 2015-12-10 01:52+0000, Wu, Feng: > >> From: Radim Kr?má? [mailto:rkrcmar@redhat.com] > >> (Physical xAPIC+x2APIC mode is still somewhat reasonable and xAPIC CPUs > >> start with LDR=0, which means that operating system doesn't need to > >> utilize mixed mode, as defined by KVM, when switching to x2APIC.) > > > > I think you mean Physical xAPIC+Physical x2APIC mode, right? For physical > > mode, we don't use LDR in any case, do we? So in physical mode, we only > > use the APIC ID, that is why they can be mixed, is my understanding correct? > > Yes. (Technically, physical and logical addressing is always active in > APIC, but xAPIC must have nonzero LDR to accept logical interrupts[1].) > If all xAPIC LDRs are zero, KVM doesn't enter a "mixed mode" even if > some are xAPIC and some x2APIC [2]. > > 1: Real LAPICs probably do not accept broadcasts on APICs where LDR=0, > KVM LAPICs do, but lowest priority broadcast is not allowed anyway, > so PI doesn't care. > > 2: KVM allows OS-writeable APIC ID, which complicates things and real > hardware probably doesn't allow it because of that ... we'd be saner > with RO APIC ID, but it's not that bad. (And no major OS does it :]) > > >> the system uses cluster xAPIC, OS should set DFR before LDR, which > >> doesn't trigger mixed mode either.) > > > > Just curious, if the APIC is software disabled and it is in xAPIC mode. OS sets > > different value for DFR for different APICs, then when OS sets LDR, KVM can > > trigger mixed flat and cluster mode, right? > > Exactly. > APICs with zeroed LDR are ignored, so KVM will use the slow-path for > delivery (= trigger mixed mode) at the moment the first APIC with > different DFR is configured. Thanks a lot for your explanation! Thanks, Feng > -- > To unsubscribe from this list: send the line "unsubscribe kvm" in > the body of a message to majordomo@vger.kernel.org > More majordomo info at http://vger.kernel.org/majordomo-info.html
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 9265196..e225106 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -1258,6 +1258,8 @@ bool kvm_vcpu_is_bsp(struct kvm_vcpu *vcpu); bool kvm_intr_is_single_vcpu(struct kvm *kvm, struct kvm_lapic_irq *irq, struct kvm_vcpu **dest_vcpu); +struct kvm_vcpu *kvm_intr_vector_hashing_dest(struct kvm *kvm, + struct kvm_lapic_irq *irq); void kvm_set_msi_irq(struct kvm_kernel_irq_routing_entry *e, struct kvm_lapic_irq *irq); diff --git a/arch/x86/kvm/irq_comm.c b/arch/x86/kvm/irq_comm.c index 84b96d3..8156e45 100644 --- a/arch/x86/kvm/irq_comm.c +++ b/arch/x86/kvm/irq_comm.c @@ -266,6 +266,58 @@ out: return r; } +/* + * This routine handles lowest-priority interrupts using vector-hashing + * mechanism. As an example, modern Intel CPUs use this method to handle + * lowest-priority interrupts. + * + * Here is the details about the vector-hashing mechanism: + * 1. For lowest-priority interrupts, store all the possible destination + * vCPUs in an array. + * 2. Use "guest vector % max number of destination vCPUs" to find the right + * destination vCPU in the array for the lowest-priority interrupt. + */ +struct kvm_vcpu *kvm_intr_vector_hashing_dest(struct kvm *kvm, + struct kvm_lapic_irq *irq) + +{ + unsigned long dest_vcpu_bitmap[BITS_TO_LONGS(KVM_MAX_VCPUS)]; + unsigned int dest_vcpus = 0; + struct kvm_vcpu *vcpu; + unsigned int i, mod, idx = 0; + + vcpu = kvm_intr_vector_hashing_dest_fast(kvm, irq); + if (vcpu) + return vcpu; + + memset(dest_vcpu_bitmap, 0, sizeof(dest_vcpu_bitmap)); + + kvm_for_each_vcpu(i, vcpu, kvm) { + if (!kvm_apic_present(vcpu)) + continue; + + if (!kvm_apic_match_dest(vcpu, NULL, irq->shorthand, + irq->dest_id, irq->dest_mode)) + continue; + + __set_bit(vcpu->vcpu_id, dest_vcpu_bitmap); + dest_vcpus++; + } + + if (dest_vcpus == 0) + return NULL; + + mod = irq->vector % dest_vcpus; + + for (i = 0; i <= mod; i++) { + idx = find_next_bit(dest_vcpu_bitmap, KVM_MAX_VCPUS, idx) + 1; + BUG_ON(idx >= KVM_MAX_VCPUS); + } + + return kvm_get_vcpu(kvm, idx - 1); +} +EXPORT_SYMBOL_GPL(kvm_intr_vector_hashing_dest); + bool kvm_intr_is_single_vcpu(struct kvm *kvm, struct kvm_lapic_irq *irq, struct kvm_vcpu **dest_vcpu) { diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index ecd4ea1..4937aa4 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -816,6 +816,63 @@ out: return ret; } +struct kvm_vcpu *kvm_intr_vector_hashing_dest_fast(struct kvm *kvm, + struct kvm_lapic_irq *irq) +{ + struct kvm_apic_map *map; + struct kvm_vcpu *vcpu = NULL; + + if (irq->shorthand) + return NULL; + + rcu_read_lock(); + map = rcu_dereference(kvm->arch.apic_map); + + if (!map) + goto out; + + if ((irq->dest_mode != APIC_DEST_PHYSICAL) && + kvm_lowest_prio_delivery(irq)) { + u16 cid; + int i, idx = 0; + unsigned long bitmap = 1; + unsigned int mod, dest_vcpus = 0; + struct kvm_lapic **dst = NULL; + + + if (!kvm_apic_logical_map_valid(map)) + goto out; + + apic_logical_id(map, irq->dest_id, &cid, (u16 *)&bitmap); + + if (cid >= ARRAY_SIZE(map->logical_map)) + goto out; + + dst = map->logical_map[cid]; + + for_each_set_bit(i, &bitmap, 16) { + if (!dst[i]) + continue; + + dest_vcpus++; + } + + mod = irq->vector % dest_vcpus; + + for (i = 0; i <= mod; i++) { + idx = find_next_bit(&bitmap, KVM_MAX_VCPUS, idx) + 1; + BUG_ON(idx >= KVM_MAX_VCPUS); + } + + if (kvm_apic_present(dst[idx-1]->vcpu)) + vcpu = dst[idx-1]->vcpu; + } + +out: + rcu_read_unlock(); + return vcpu; +} + /* * Add a pending IRQ into lapic. * Return 1 if successfully added and 0 if discarded. diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h index fde8e35d..a6a775d 100644 --- a/arch/x86/kvm/lapic.h +++ b/arch/x86/kvm/lapic.h @@ -170,4 +170,6 @@ void wait_lapic_expire(struct kvm_vcpu *vcpu); bool kvm_intr_is_single_vcpu_fast(struct kvm *kvm, struct kvm_lapic_irq *irq, struct kvm_vcpu **dest_vcpu); +struct kvm_vcpu *kvm_intr_vector_hashing_dest_fast(struct kvm *kvm, + struct kvm_lapic_irq *irq); #endif diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 5eb56ed..57f71ee 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -159,6 +159,9 @@ static int ple_window_actual_max = KVM_VMX_DEFAULT_PLE_WINDOW_MAX; static int ple_window_max = KVM_VMX_DEFAULT_PLE_WINDOW_MAX; module_param(ple_window_max, int, S_IRUGO); +static bool __read_mostly enable_pi_vector_hashing = 1; +module_param(enable_pi_vector_hashing, bool, S_IRUGO); + extern const ulong vmx_return; #define NR_AUTOLOAD_MSRS 8 @@ -10702,8 +10705,15 @@ static int vmx_update_pi_irte(struct kvm *kvm, unsigned int host_irq, */ kvm_set_msi_irq(e, &irq); - if (!kvm_intr_is_single_vcpu(kvm, &irq, &vcpu)) - continue; + if (!kvm_intr_is_single_vcpu(kvm, &irq, &vcpu)) { + if ((!enable_pi_vector_hashing || + irq.delivery_mode != APIC_DM_LOWEST)) + continue; + + vcpu = kvm_intr_vector_hashing_dest(kvm, &irq); + if (!vcpu) + continue; + } vcpu_info.pi_desc_addr = __pa(vcpu_to_pi_desc(vcpu)); vcpu_info.vector = irq.vector;
Use vector-hashing to handle lowest-priority interrupts for posted-interrupts. As an example, modern Intel CPUs use this method to handle lowest-priority interrupts. Signed-off-by: Feng Wu <feng.wu@intel.com> --- arch/x86/include/asm/kvm_host.h | 2 ++ arch/x86/kvm/irq_comm.c | 52 +++++++++++++++++++++++++++++++++++++ arch/x86/kvm/lapic.c | 57 +++++++++++++++++++++++++++++++++++++++++ arch/x86/kvm/lapic.h | 2 ++ arch/x86/kvm/vmx.c | 14 ++++++++-- 5 files changed, 125 insertions(+), 2 deletions(-)