@@ -128,6 +128,17 @@ static inline u32 kvm_x2apic_id(struct kvm_lapic *apic)
return apic->vcpu->vcpu_id;
}
+bool kvm_can_post_timer_interrupt(struct kvm_vcpu *vcpu)
+{
+ return pi_inject_timer && kvm_vcpu_apicv_active(vcpu);
+}
+EXPORT_SYMBOL_GPL(kvm_can_post_timer_interrupt);
+
+static bool kvm_use_posted_timer_interrupt(struct kvm_vcpu *vcpu)
+{
+ return kvm_can_post_timer_interrupt(vcpu) && vcpu->mode == IN_GUEST_MODE;
+}
+
static inline bool kvm_apic_map_get_logical_dest(struct kvm_apic_map *map,
u32 dest_id, struct kvm_lapic ***cluster, u16 *mask) {
switch (map->mode) {
@@ -1436,29 +1447,6 @@ static void apic_update_lvtt(struct kvm_lapic *apic)
}
}
-static void apic_timer_expired(struct kvm_lapic *apic)
-{
- struct kvm_vcpu *vcpu = apic->vcpu;
- struct swait_queue_head *q = &vcpu->wq;
- struct kvm_timer *ktimer = &apic->lapic_timer;
-
- if (atomic_read(&apic->lapic_timer.pending))
- return;
-
- atomic_inc(&apic->lapic_timer.pending);
- kvm_set_pending_timer(vcpu);
-
- /*
- * For x86, the atomic_inc() is serialized, thus
- * using swait_active() is safe.
- */
- if (swait_active(q))
- swake_up_one(q);
-
- if (apic_lvtt_tscdeadline(apic) || ktimer->hv_timer_in_use)
- ktimer->expired_tscdeadline = ktimer->tscdeadline;
-}
-
/*
* On APICv, this test will cause a busy wait
* during a higher-priority task.
@@ -1532,7 +1520,7 @@ static inline void adjust_lapic_timer_advance(struct kvm_vcpu *vcpu,
apic->lapic_timer.timer_advance_ns = timer_advance_ns;
}
-void kvm_wait_lapic_expire(struct kvm_vcpu *vcpu)
+static void __kvm_wait_lapic_expire(struct kvm_vcpu *vcpu)
{
struct kvm_lapic *apic = vcpu->arch.apic;
u64 guest_tsc, tsc_deadline;
@@ -1540,9 +1528,6 @@ void kvm_wait_lapic_expire(struct kvm_vcpu *vcpu)
if (apic->lapic_timer.expired_tscdeadline == 0)
return;
- if (!lapic_timer_int_injected(vcpu))
- return;
-
tsc_deadline = apic->lapic_timer.expired_tscdeadline;
apic->lapic_timer.expired_tscdeadline = 0;
guest_tsc = kvm_read_l1_tsc(vcpu, rdtsc());
@@ -1554,8 +1539,59 @@ void kvm_wait_lapic_expire(struct kvm_vcpu *vcpu)
if (unlikely(!apic->lapic_timer.timer_advance_adjust_done))
adjust_lapic_timer_advance(vcpu, apic->lapic_timer.advance_expire_delta);
}
+
+void kvm_wait_lapic_expire(struct kvm_vcpu *vcpu)
+{
+ if (!lapic_timer_int_injected(vcpu))
+ return;
+
+ __kvm_wait_lapic_expire(vcpu);
+}
EXPORT_SYMBOL_GPL(kvm_wait_lapic_expire);
+static void kvm_apic_inject_pending_timer_irqs(struct kvm_lapic *apic)
+{
+ struct kvm_timer *ktimer = &apic->lapic_timer;
+
+ kvm_apic_local_deliver(apic, APIC_LVTT);
+ if (apic_lvtt_tscdeadline(apic))
+ ktimer->tscdeadline = 0;
+ if (apic_lvtt_oneshot(apic)) {
+ ktimer->tscdeadline = 0;
+ ktimer->target_expiration = 0;
+ }
+}
+
+static void apic_timer_expired(struct kvm_lapic *apic)
+{
+ struct kvm_vcpu *vcpu = apic->vcpu;
+ struct swait_queue_head *q = &vcpu->wq;
+ struct kvm_timer *ktimer = &apic->lapic_timer;
+
+ if (atomic_read(&apic->lapic_timer.pending))
+ return;
+
+ if (apic_lvtt_tscdeadline(apic) || ktimer->hv_timer_in_use)
+ ktimer->expired_tscdeadline = ktimer->tscdeadline;
+
+ if (kvm_use_posted_timer_interrupt(apic->vcpu)) {
+ if (apic->lapic_timer.timer_advance_ns)
+ __kvm_wait_lapic_expire(vcpu);
+ kvm_apic_inject_pending_timer_irqs(apic);
+ return;
+ }
+
+ atomic_inc(&apic->lapic_timer.pending);
+ kvm_set_pending_timer(vcpu);
+
+ /*
+ * For x86, the atomic_inc() is serialized, thus
+ * using swait_active() is safe.
+ */
+ if (swait_active(q))
+ swake_up_one(q);
+}
+
static void start_sw_tscdeadline(struct kvm_lapic *apic)
{
struct kvm_timer *ktimer = &apic->lapic_timer;
@@ -2377,13 +2413,7 @@ void kvm_inject_apic_timer_irqs(struct kvm_vcpu *vcpu)
struct kvm_lapic *apic = vcpu->arch.apic;
if (atomic_read(&apic->lapic_timer.pending) > 0) {
- kvm_apic_local_deliver(apic, APIC_LVTT);
- if (apic_lvtt_tscdeadline(apic))
- apic->lapic_timer.tscdeadline = 0;
- if (apic_lvtt_oneshot(apic)) {
- apic->lapic_timer.tscdeadline = 0;
- apic->lapic_timer.target_expiration = 0;
- }
+ kvm_apic_inject_pending_timer_irqs(apic);
atomic_set(&apic->lapic_timer.pending, 0);
}
}
@@ -2505,7 +2535,8 @@ void __kvm_migrate_apic_timer(struct kvm_vcpu *vcpu)
{
struct hrtimer *timer;
- if (!lapic_in_kernel(vcpu))
+ if (!lapic_in_kernel(vcpu) ||
+ kvm_can_post_timer_interrupt(vcpu))
return;
timer = &vcpu->arch.apic->lapic_timer.timer;
@@ -231,6 +231,7 @@ int kvm_vector_to_index(u32 vector, u32 dest_vcpus,
void kvm_lapic_expired_hv_timer(struct kvm_vcpu *vcpu);
bool kvm_lapic_hv_timer_in_use(struct kvm_vcpu *vcpu);
void kvm_lapic_restart_hv_timer(struct kvm_vcpu *vcpu);
+bool kvm_can_post_timer_interrupt(struct kvm_vcpu *vcpu);
static inline enum lapic_mode kvm_apic_mode(u64 apic_base)
{
@@ -7036,7 +7036,8 @@ static int vmx_set_hv_timer(struct kvm_vcpu *vcpu, u64 guest_deadline_tsc,
u64 tscl, guest_tscl, delta_tsc, lapic_timer_advance_cycles;
struct kvm_timer *ktimer = &vcpu->arch.apic->lapic_timer;
- if (kvm_mwait_in_guest(vcpu->kvm))
+ if (kvm_mwait_in_guest(vcpu->kvm) ||
+ kvm_can_post_timer_interrupt(vcpu))
return -EOPNOTSUPP;
vmx = to_vmx(vcpu);
@@ -54,6 +54,7 @@
#include <linux/kvm_irqfd.h>
#include <linux/irqbypass.h>
#include <linux/sched/stat.h>
+#include <linux/sched/isolation.h>
#include <linux/mem_encrypt.h>
#include <trace/events/kvm.h>
@@ -155,6 +156,9 @@
static bool __read_mostly force_emulation_prefix = false;
module_param(force_emulation_prefix, bool, S_IRUGO);
+int __read_mostly pi_inject_timer = -1;
+module_param(pi_inject_timer, bint, S_IRUGO | S_IWUSR);
+
#define KVM_NR_SHARED_MSRS 16
struct kvm_shared_msrs_global {
@@ -7043,6 +7047,8 @@ int kvm_arch_init(void *opaque)
host_xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK);
kvm_lapic_init();
+ if (pi_inject_timer == -1)
+ pi_inject_timer = housekeeping_enabled(HK_FLAG_TIMER);
#ifdef CONFIG_X86_64
pvclock_gtod_register_notifier(&pvclock_gtod_notifier);
@@ -296,6 +296,8 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu, unsigned long cr2,
extern bool enable_vmware_backdoor;
+extern int pi_inject_timer;
+
extern struct static_key kvm_no_apic_vcpu;
static inline u64 nsec_to_cycles(struct kvm_vcpu *vcpu, u64 nsec)
@@ -19,6 +19,7 @@ enum hk_flags {
DECLARE_STATIC_KEY_FALSE(housekeeping_overridden);
extern int housekeeping_any_cpu(enum hk_flags flags);
extern const struct cpumask *housekeeping_cpumask(enum hk_flags flags);
+extern bool housekeeping_enabled(enum hk_flags flags);
extern void housekeeping_affine(struct task_struct *t, enum hk_flags flags);
extern bool housekeeping_test_cpu(int cpu, enum hk_flags flags);
extern void __init housekeeping_init(void);
@@ -38,6 +39,7 @@ static inline const struct cpumask *housekeeping_cpumask(enum hk_flags flags)
static inline void housekeeping_affine(struct task_struct *t,
enum hk_flags flags) { }
static inline void housekeeping_init(void) { }
+static inline bool housekeeping_enabled(enum hk_flags flags) { }
#endif /* CONFIG_CPU_ISOLATION */
static inline bool housekeeping_cpu(int cpu, enum hk_flags flags)
@@ -13,6 +13,12 @@
static cpumask_var_t housekeeping_mask;
static unsigned int housekeeping_flags;
+bool housekeeping_enabled(enum hk_flags flags)
+{
+ return !!(housekeeping_flags & flags);
+}
+EXPORT_SYMBOL_GPL(housekeeping_enabled);
+
int housekeeping_any_cpu(enum hk_flags flags)
{
if (static_branch_unlikely(&housekeeping_overridden))