@@ -95,6 +95,26 @@ TRACE_EVENT(kvm_ioapic_set_irq,
__entry->coalesced ? " (coalesced)" : "")
);
+TRACE_EVENT(kvm_ioapic_delayed_eoi_inj,
+ TP_PROTO(__u64 e),
+ TP_ARGS(e),
+
+ TP_STRUCT__entry(
+ __field( __u64, e )
+ ),
+
+ TP_fast_assign(
+ __entry->e = e;
+ ),
+
+ TP_printk("dst %x vec=%u (%s|%s|%s%s)%s",
+ (u8)(__entry->e >> 56), (u8)__entry->e,
+ __print_symbolic((__entry->e >> 8 & 0x7), kvm_deliver_mode),
+ (__entry->e & (1<<11)) ? "logical" : "physical",
+ (__entry->e & (1<<15)) ? "level" : "edge",
+ (__entry->e & (1<<16)) ? "|masked" : "")
+);
+
TRACE_EVENT(kvm_msi_set_irq,
TP_PROTO(__u64 address, __u64 data),
TP_ARGS(address, data),
@@ -405,6 +405,24 @@ void kvm_ioapic_clear_all(struct kvm_ioapic *ioapic, int irq_source_id)
spin_unlock(&ioapic->lock);
}
+static void kvm_ioapic_eoi_inject_work(struct work_struct *work)
+{
+ int i;
+ struct kvm_ioapic *ioapic = container_of(work, struct kvm_ioapic,
+ eoi_inject.work);
+ spin_lock(&ioapic->lock);
+ for (i = 0; i < IOAPIC_NUM_PINS; i++) {
+ union kvm_ioapic_redirect_entry *ent = &ioapic->redirtbl[i];
+
+ if (ent->fields.trig_mode != IOAPIC_LEVEL_TRIG)
+ continue;
+
+ if (ioapic->irr & (1 << i) && !ent->fields.remote_irr)
+ ioapic_service(ioapic, i, false);
+ }
+ spin_unlock(&ioapic->lock);
+}
+
static void __kvm_ioapic_update_eoi(struct kvm_vcpu *vcpu,
struct kvm_ioapic *ioapic, int vector, int trigger_mode)
{
@@ -435,8 +453,33 @@ static void __kvm_ioapic_update_eoi(struct kvm_vcpu *vcpu,
ASSERT(ent->fields.trig_mode == IOAPIC_LEVEL_TRIG);
ent->fields.remote_irr = 0;
- if (ioapic->irr & (1 << i))
- ioapic_service(ioapic, i, false);
+ if (!ent->fields.mask && (ioapic->irr & (1 << i))) {
+ ++ioapic->irq_eoi[i];
+ if (ioapic->irq_eoi[i] == IOAPIC_SUCCESSIVE_IRQ_MAX_COUNT) {
+ /*
+ * Real hardware does not deliver the irq so
+ * immediately during eoi broadcast, so we need
+ * to emulate this behavior. Otherwise, for
+ * guests who has not registered handler of a
+ * level irq, this irq would be injected
+ * immediately after guest enables interrupt
+ * (which happens usually at the end of the
+ * common interrupt routine). This would lead
+ * guest can't move forward and may miss the
+ * possibility to get proper irq handler
+ * registered. So we need to give some breath to
+ * guest. TODO: 1 is too long?
+ */
+ schedule_delayed_work(&ioapic->eoi_inject, 1);
+ ioapic->irq_eoi[i] = 0;
+ trace_kvm_ioapic_delayed_eoi_inj(ent->bits);
+ }
+ else {
+ ioapic_service(ioapic, i, false);
+ }
+ else {
+ ioapic->irq_eoi[i] = 0;
+ }
}
}
@@ -565,12 +608,14 @@ static void kvm_ioapic_reset(struct kvm_ioapic *ioapic)
{
int i;
+ cancel_delayed_work_sync(&ioapic->eoi_inject);
for (i = 0; i < IOAPIC_NUM_PINS; i++)
ioapic->redirtbl[i].fields.mask = 1;
ioapic->base_address = IOAPIC_DEFAULT_BASE_ADDRESS;
ioapic->ioregsel = 0;
ioapic->irr = 0;
ioapic->id = 0;
+ memset(ioapic->irq_eoi, 0x00, IOAPIC_NUM_PINS);
rtc_irq_eoi_tracking_reset(ioapic);
update_handled_vectors(ioapic);
}
@@ -589,6 +634,7 @@ int kvm_ioapic_init(struct kvm *kvm)
if (!ioapic)
return -ENOMEM;
spin_lock_init(&ioapic->lock);
+ INIT_DELAYED_WORK(&ioapic->eoi_inject, kvm_ioapic_eoi_inject_work);
kvm->arch.vioapic = ioapic;
kvm_ioapic_reset(ioapic);
kvm_iodevice_init(&ioapic->dev, &ioapic_mmio_ops);
@@ -609,6 +655,7 @@ void kvm_ioapic_destroy(struct kvm *kvm)
{
struct kvm_ioapic *ioapic = kvm->arch.vioapic;
+ cancel_delayed_work_sync(&ioapic->eoi_inject);
if (ioapic) {
kvm_io_bus_unregister_dev(kvm, KVM_MMIO_BUS, &ioapic->dev);
kvm->arch.vioapic = NULL;
@@ -34,6 +34,10 @@ struct kvm_vcpu;
#define IOAPIC_INIT 0x5
#define IOAPIC_EXTINT 0x7
+/* max successive count of the same irq during ioapic eoi broadcast, if this limitation reached,
+ which can be considered as interrupt storm happened */
+#define IOAPIC_SUCCESSIVE_IRQ_MAX_COUNT 10000
+
#ifdef CONFIG_X86
#define RTC_GSI 8
#else
@@ -59,6 +63,8 @@ struct kvm_ioapic {
spinlock_t lock;
DECLARE_BITMAP(handled_vectors, 256);
struct rtc_status rtc_status;
+ struct delayed_work eoi_inject;
+ u32 irq_eoi[IOAPIC_NUM_PINS];
};
#ifdef DEBUG