@@ -862,6 +862,9 @@ struct kvm_x86_ops {
gfn_t offset, unsigned long mask);
/* pmu operations of sub-arch */
const struct kvm_pmu_ops *pmu_ops;
+
+ int (*update_pi_irte)(struct kvm *kvm, unsigned int host_irq,
+ uint32_t guest_irq, bool set);
};
struct kvm_arch_async_pf {
@@ -974,6 +974,39 @@ TRACE_EVENT(kvm_enter_smm,
__entry->smbase)
);
+/*
+ * Tracepoint for VT-d posted-interrupts.
+ */
+TRACE_EVENT(kvm_pi_irte_update,
+ TP_PROTO(unsigned int vcpu_id, unsigned int gsi,
+ unsigned int gvec, u64 pi_desc_addr, bool set),
+ TP_ARGS(vcpu_id, gsi, gvec, pi_desc_addr, set),
+
+ TP_STRUCT__entry(
+ __field( unsigned int, vcpu_id )
+ __field( unsigned int, gsi )
+ __field( unsigned int, gvec )
+ __field( u64, pi_desc_addr )
+ __field( bool, set )
+ ),
+
+ TP_fast_assign(
+ __entry->vcpu_id = vcpu_id;
+ __entry->gsi = gsi;
+ __entry->gvec = gvec;
+ __entry->pi_desc_addr = pi_desc_addr;
+ __entry->set = set;
+ ),
+
+ TP_printk("VT-d PI is %s for this irq, vcpu %u, gsi: 0x%x, "
+ "gvec: 0x%x, pi_desc_addr: 0x%llx",
+ __entry->set ? "enabled and being updated" : "disabled",
+ __entry->vcpu_id,
+ __entry->gsi,
+ __entry->gvec,
+ __entry->pi_desc_addr)
+);
+
#endif /* _TRACE_KVM_H */
#undef TRACE_INCLUDE_PATH
@@ -45,6 +45,7 @@
#include <asm/debugreg.h>
#include <asm/kexec.h>
#include <asm/apic.h>
+#include <asm/irq_remapping.h>
#include "trace.h"
#include "pmu.h"
@@ -605,6 +606,11 @@ static inline struct vcpu_vmx *to_vmx(struct kvm_vcpu *vcpu)
return container_of(vcpu, struct vcpu_vmx, vcpu);
}
+struct pi_desc *vcpu_to_pi_desc(struct kvm_vcpu *vcpu)
+{
+ return &(to_vmx(vcpu)->pi_desc);
+}
+
#define VMCS12_OFFSET(x) offsetof(struct vmcs12, x)
#define FIELD(number, name) [number] = VMCS12_OFFSET(name)
#define FIELD64(number, name) [number] = VMCS12_OFFSET(name), \
@@ -10344,6 +10350,81 @@ static void vmx_enable_log_dirty_pt_masked(struct kvm *kvm,
kvm_mmu_clear_dirty_pt_masked(kvm, memslot, offset, mask);
}
+/*
+ * vmx_update_pi_irte - set IRTE for Posted-Interrupts
+ *
+ * @kvm: kvm
+ * @host_irq: host irq of the interrupt
+ * @guest_irq: gsi of the interrupt
+ * @set: set or unset PI
+ * returns 0 on success, < 0 on failure
+ */
+int vmx_update_pi_irte(struct kvm *kvm, unsigned int host_irq,
+ uint32_t guest_irq, bool set)
+{
+ struct kvm_kernel_irq_routing_entry *e;
+ struct kvm_irq_routing_table *irq_rt;
+ struct kvm_lapic_irq irq;
+ struct kvm_vcpu *vcpu;
+ struct vcpu_data vcpu_info;
+ int idx, ret = -EINVAL;
+
+ if (!kvm_arch_has_assigned_device(kvm) ||
+ !irq_remapping_cap(IRQ_POSTING_CAP))
+ return 0;
+
+ idx = srcu_read_lock(&kvm->irq_srcu);
+ irq_rt = srcu_dereference(kvm->irq_routing, &kvm->irq_srcu);
+ BUG_ON(guest_irq >= irq_rt->nr_rt_entries);
+
+ hlist_for_each_entry(e, &irq_rt->map[guest_irq], link) {
+ if (e->type != KVM_IRQ_ROUTING_MSI)
+ continue;
+ /*
+ * VT-d PI cannot support posting multicast/broadcast
+ * interrupts to a vCPU, we still use interrupt remapping
+ * for these kind of interrupts.
+ *
+ * For lowest-priority interrupts, we only support
+ * those with single CPU as the destination, e.g. user
+ * configures the interrupts via /proc/irq or uses
+ * irqbalance to make the interrupts single-CPU.
+ *
+ * We will support full lowest-priority interrupt later.
+ */
+
+ kvm_set_msi_irq(e, &irq);
+ if (!kvm_intr_is_single_vcpu(kvm, &irq, &vcpu))
+ continue;
+
+ vcpu_info.pi_desc_addr = __pa(vcpu_to_pi_desc(vcpu));
+ vcpu_info.vector = irq.vector;
+
+ trace_kvm_pi_irte_update(vcpu->vcpu_id, e->gsi,
+ vcpu_info.vector, vcpu_info.pi_desc_addr, set);
+
+ if (set)
+ ret = irq_set_vcpu_affinity(host_irq, &vcpu_info);
+ else {
+ /* suppress notification event before unposting */
+ pi_set_sn(vcpu_to_pi_desc(vcpu));
+ ret = irq_set_vcpu_affinity(host_irq, NULL);
+ pi_clear_sn(vcpu_to_pi_desc(vcpu));
+ }
+
+ if (ret < 0) {
+ printk(KERN_INFO "%s: failed to update PI IRTE\n",
+ __func__);
+ goto out;
+ }
+ }
+
+ ret = 0;
+out:
+ srcu_read_unlock(&kvm->irq_srcu, idx);
+ return ret;
+}
+
static struct kvm_x86_ops vmx_x86_ops = {
.cpu_has_kvm_support = cpu_has_kvm_support,
.disabled_by_bios = vmx_disabled_by_bios,
@@ -10461,6 +10542,8 @@ static struct kvm_x86_ops vmx_x86_ops = {
.enable_log_dirty_pt_masked = vmx_enable_log_dirty_pt_masked,
.pmu_ops = &intel_pmu_ops,
+
+ .update_pi_irte = vmx_update_pi_irte,
};
static int __init vmx_init(void)
@@ -63,6 +63,7 @@
#include <asm/fpu/internal.h> /* Ugh! */
#include <asm/pvclock.h>
#include <asm/div64.h>
+#include <asm/irq_remapping.h>
#define MAX_IO_MSRS 256
#define KVM_MAX_MCE_BANKS 32
@@ -8263,3 +8264,4 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_intercepts);
EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_write_tsc_offset);
EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_ple_window);
EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_pml_full);
+EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_pi_irte_update);
This patch adds the routine to update IRTE for posted-interrupts when guest changes the interrupt configuration. Signed-off-by: Feng Wu <feng.wu@intel.com> --- v9: - Check !kvm_arch_has_assigned_device(kvm) first then !irq_remapping_cap(IRQ_POSTING_CAP) v8: - Move 'kvm_arch_update_pi_irte' to vmx.c as a callback - Only update the PI irte when VM has assigned devices - Add a trace point for VT-d posted-interrupts when we update or disable it for a specific irq. arch/x86/include/asm/kvm_host.h | 3 ++ arch/x86/kvm/trace.h | 33 ++++++++++++++++ arch/x86/kvm/vmx.c | 83 +++++++++++++++++++++++++++++++++++++++++ arch/x86/kvm/x86.c | 2 + 4 files changed, 121 insertions(+)