Message ID | 20231214024727.3503870-2-vineeth@bitbyteword.org (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | Dynamic vcpu priority management in kvm | expand |
"Vineeth Pillai (Google)" <vineeth@bitbyteword.org> writes: > Implement a kvm MSR that guest uses to provide the GPA of shared memory > for communicating the scheduling information between host and guest. > > wrmsr(0) disables the feature. wrmsr(valid_gpa) enables the feature and > uses the gpa for further communication. > > Also add a new cpuid feature flag for the host to advertise the feature > to the guest. > > Co-developed-by: Joel Fernandes (Google) <joel@joelfernandes.org> > Signed-off-by: Joel Fernandes (Google) <joel@joelfernandes.org> > Signed-off-by: Vineeth Pillai (Google) <vineeth@bitbyteword.org> > --- > arch/x86/include/asm/kvm_host.h | 25 ++++++++++++ > arch/x86/include/uapi/asm/kvm_para.h | 24 +++++++++++ > arch/x86/kvm/Kconfig | 12 ++++++ > arch/x86/kvm/cpuid.c | 2 + > arch/x86/kvm/x86.c | 61 ++++++++++++++++++++++++++++ > include/linux/kvm_host.h | 5 +++ > 6 files changed, 129 insertions(+) > > diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h > index f72b30d2238a..f89ba1f07d88 100644 > --- a/arch/x86/include/asm/kvm_host.h > +++ b/arch/x86/include/asm/kvm_host.h > @@ -987,6 +987,18 @@ struct kvm_vcpu_arch { > /* Protected Guests */ > bool guest_state_protected; > > +#ifdef CONFIG_PARAVIRT_SCHED_KVM > + /* > + * MSR to setup a shared memory for scheduling > + * information sharing between host and guest. > + */ > + struct { > + enum kvm_vcpu_boost_state boost_status; > + u64 msr_val; > + struct gfn_to_hva_cache data; > + } pv_sched; > +#endif > + > /* > * Set when PDPTS were loaded directly by the userspace without > * reading the guest memory > @@ -2217,4 +2229,17 @@ int memslot_rmap_alloc(struct kvm_memory_slot *slot, unsigned long npages); > */ > #define KVM_EXIT_HYPERCALL_MBZ GENMASK_ULL(31, 1) > > +#ifdef CONFIG_PARAVIRT_SCHED_KVM > +static inline bool kvm_arch_vcpu_pv_sched_enabled(struct kvm_vcpu_arch *arch) > +{ > + return arch->pv_sched.msr_val; > +} > + > +static inline void kvm_arch_vcpu_set_boost_status(struct kvm_vcpu_arch *arch, > + enum kvm_vcpu_boost_state boost_status) > +{ > + arch->pv_sched.boost_status = boost_status; > +} > +#endif > + > #endif /* _ASM_X86_KVM_HOST_H */ > diff --git a/arch/x86/include/uapi/asm/kvm_para.h b/arch/x86/include/uapi/asm/kvm_para.h > index 6e64b27b2c1e..6b1dea07a563 100644 > --- a/arch/x86/include/uapi/asm/kvm_para.h > +++ b/arch/x86/include/uapi/asm/kvm_para.h > @@ -36,6 +36,7 @@ > #define KVM_FEATURE_MSI_EXT_DEST_ID 15 > #define KVM_FEATURE_HC_MAP_GPA_RANGE 16 > #define KVM_FEATURE_MIGRATION_CONTROL 17 > +#define KVM_FEATURE_PV_SCHED 18 > > #define KVM_HINTS_REALTIME 0 > > @@ -58,6 +59,7 @@ > #define MSR_KVM_ASYNC_PF_INT 0x4b564d06 > #define MSR_KVM_ASYNC_PF_ACK 0x4b564d07 > #define MSR_KVM_MIGRATION_CONTROL 0x4b564d08 > +#define MSR_KVM_PV_SCHED 0x4b564da0 > > struct kvm_steal_time { > __u64 steal; > @@ -150,4 +152,26 @@ struct kvm_vcpu_pv_apf_data { > #define KVM_PV_EOI_ENABLED KVM_PV_EOI_MASK > #define KVM_PV_EOI_DISABLED 0x0 > > +/* > + * VCPU boost state shared between the host and guest. > + */ > +enum kvm_vcpu_boost_state { > + /* Priority boosting feature disabled in host */ > + VCPU_BOOST_DISABLED = 0, > + /* > + * vcpu is not explicitly boosted by the host. > + * (Default priority when the guest started) > + */ > + VCPU_BOOST_NORMAL, > + /* vcpu is boosted by the host */ > + VCPU_BOOST_BOOSTED > +}; > + > +/* > + * Structure passed in via MSR_KVM_PV_SCHED > + */ > +struct pv_sched_data { > + __u64 boost_status; > +}; > + > #endif /* _UAPI_ASM_X86_KVM_PARA_H */ > diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig > index 89ca7f4c1464..dbcba73fb508 100644 > --- a/arch/x86/kvm/Kconfig > +++ b/arch/x86/kvm/Kconfig > @@ -141,4 +141,16 @@ config KVM_XEN > config KVM_EXTERNAL_WRITE_TRACKING > bool > > +config PARAVIRT_SCHED_KVM > + bool "Enable paravirt scheduling capability for kvm" > + depends on KVM > + help > + Paravirtualized scheduling facilitates the exchange of scheduling > + related information between the host and guest through shared memory, > + enhancing the efficiency of vCPU thread scheduling by the hypervisor. > + An illustrative use case involves dynamically boosting the priority of > + a vCPU thread when the guest is executing a latency-sensitive workload > + on that specific vCPU. > + This config enables paravirt scheduling in the kvm hypervisor. > + > endif # VIRTUALIZATION > diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c > index 7bdc66abfc92..960ef6e869f2 100644 > --- a/arch/x86/kvm/cpuid.c > +++ b/arch/x86/kvm/cpuid.c > @@ -1113,6 +1113,8 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function) > (1 << KVM_FEATURE_POLL_CONTROL) | > (1 << KVM_FEATURE_PV_SCHED_YIELD) | > (1 << KVM_FEATURE_ASYNC_PF_INT); > + if (IS_ENABLED(CONFIG_PARAVIRT_SCHED_KVM)) > + entry->eax |= (1 << KVM_FEATURE_PV_SCHED); > > if (sched_info_on()) > entry->eax |= (1 << KVM_FEATURE_STEAL_TIME); > diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c > index 7bcf1a76a6ab..0f475b50ac83 100644 > --- a/arch/x86/kvm/x86.c > +++ b/arch/x86/kvm/x86.c > @@ -3879,6 +3879,33 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) > return 1; > break; > > +#ifdef CONFIG_PARAVIRT_SCHED_KVM > + case MSR_KVM_PV_SCHED: > + if (!guest_pv_has(vcpu, KVM_FEATURE_PV_SCHED)) > + return 1; > + > + if (!(data & KVM_MSR_ENABLED)) > + break; > + > + if (!(data & ~KVM_MSR_ENABLED)) { > + /* > + * Disable the feature > + */ > + vcpu->arch.pv_sched.msr_val = 0; > + kvm_set_vcpu_boosted(vcpu, false); > + } if (!kvm_gfn_to_hva_cache_init(vcpu->kvm, > + &vcpu->arch.pv_sched.data, data & ~KVM_MSR_ENABLED, > + sizeof(struct pv_sched_data))) { > + vcpu->arch.pv_sched.msr_val = data; > + kvm_set_vcpu_boosted(vcpu, false); > + } else { > + pr_warn("MSR_KVM_PV_SCHED: kvm:%p, vcpu:%p, " > + "msr value: %llx, kvm_gfn_to_hva_cache_init failed!\n", > + vcpu->kvm, vcpu, data & ~KVM_MSR_ENABLED); As this is triggerable by the guest please drop this print (which is not even ratelimited!). I think it would be better to just 'return 1;' in case of kvm_gfn_to_hva_cache_init() failure but maybe you also need to account for 'msr_info->host_initiated' to not fail setting this MSR from the host upon migration. > + } > + break; > +#endif > + > case MSR_KVM_POLL_CONTROL: > if (!guest_pv_has(vcpu, KVM_FEATURE_POLL_CONTROL)) > return 1; > @@ -4239,6 +4266,11 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) > > msr_info->data = vcpu->arch.pv_eoi.msr_val; > break; > +#ifdef CONFIG_PARAVIRT_SCHED_KVM > + case MSR_KVM_PV_SCHED: > + msr_info->data = vcpu->arch.pv_sched.msr_val; > + break; > +#endif > case MSR_KVM_POLL_CONTROL: > if (!guest_pv_has(vcpu, KVM_FEATURE_POLL_CONTROL)) > return 1; > @@ -9820,6 +9852,29 @@ static int complete_hypercall_exit(struct kvm_vcpu *vcpu) > return kvm_skip_emulated_instruction(vcpu); > } > > +#ifdef CONFIG_PARAVIRT_SCHED_KVM > +static void record_vcpu_boost_status(struct kvm_vcpu *vcpu) > +{ > + u64 val = vcpu->arch.pv_sched.boost_status; > + > + if (!kvm_arch_vcpu_pv_sched_enabled(&vcpu->arch)) > + return; > + > + pagefault_disable(); > + kvm_write_guest_offset_cached(vcpu->kvm, &vcpu->arch.pv_sched.data, > + &val, offsetof(struct pv_sched_data, boost_status), sizeof(u64)); > + pagefault_enable(); > +} > + > +void kvm_set_vcpu_boosted(struct kvm_vcpu *vcpu, bool boosted) > +{ > + kvm_arch_vcpu_set_boost_status(&vcpu->arch, > + boosted ? VCPU_BOOST_BOOSTED : VCPU_BOOST_NORMAL); > + > + kvm_make_request(KVM_REQ_VCPU_BOOST_UPDATE, vcpu); > +} > +#endif > + > int kvm_emulate_hypercall(struct kvm_vcpu *vcpu) > { > unsigned long nr, a0, a1, a2, a3, ret; > @@ -10593,6 +10648,12 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) > } > if (kvm_check_request(KVM_REQ_STEAL_UPDATE, vcpu)) > record_steal_time(vcpu); > + > +#ifdef CONFIG_PARAVIRT_SCHED_KVM > + if (kvm_check_request(KVM_REQ_VCPU_BOOST_UPDATE, vcpu)) > + record_vcpu_boost_status(vcpu); > +#endif > + > #ifdef CONFIG_KVM_SMM > if (kvm_check_request(KVM_REQ_SMI, vcpu)) > process_smi(vcpu); > diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h > index 9d3ac7720da9..a74aeea55347 100644 > --- a/include/linux/kvm_host.h > +++ b/include/linux/kvm_host.h > @@ -167,6 +167,7 @@ static inline bool is_error_page(struct page *page) > #define KVM_REQ_VM_DEAD (1 | KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP) > #define KVM_REQ_UNBLOCK 2 > #define KVM_REQ_DIRTY_RING_SOFT_FULL 3 > +#define KVM_REQ_VCPU_BOOST_UPDATE 6 > #define KVM_REQUEST_ARCH_BASE 8 > > /* > @@ -2287,4 +2288,8 @@ static inline void kvm_account_pgtable_pages(void *virt, int nr) > /* Max number of entries allowed for each kvm dirty ring */ > #define KVM_DIRTY_RING_MAX_ENTRIES 65536 > > +#ifdef CONFIG_PARAVIRT_SCHED_KVM > +void kvm_set_vcpu_boosted(struct kvm_vcpu *vcpu, bool boosted); > +#endif > + > #endif
On Thu, Dec 14, 2023 at 5:53 AM Vitaly Kuznetsov <vkuznets@redhat.com> wrote: > > "Vineeth Pillai (Google)" <vineeth@bitbyteword.org> writes: > > > Implement a kvm MSR that guest uses to provide the GPA of shared memory > > for communicating the scheduling information between host and guest. > > > > wrmsr(0) disables the feature. wrmsr(valid_gpa) enables the feature and > > uses the gpa for further communication. > > > > Also add a new cpuid feature flag for the host to advertise the feature > > to the guest. > > > > Co-developed-by: Joel Fernandes (Google) <joel@joelfernandes.org> > > Signed-off-by: Joel Fernandes (Google) <joel@joelfernandes.org> > > Signed-off-by: Vineeth Pillai (Google) <vineeth@bitbyteword.org> > > --- > > arch/x86/include/asm/kvm_host.h | 25 ++++++++++++ > > arch/x86/include/uapi/asm/kvm_para.h | 24 +++++++++++ > > arch/x86/kvm/Kconfig | 12 ++++++ > > arch/x86/kvm/cpuid.c | 2 + > > arch/x86/kvm/x86.c | 61 ++++++++++++++++++++++++++++ > > include/linux/kvm_host.h | 5 +++ > > 6 files changed, 129 insertions(+) > > > > diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h > > index f72b30d2238a..f89ba1f07d88 100644 > > --- a/arch/x86/include/asm/kvm_host.h > > +++ b/arch/x86/include/asm/kvm_host.h > > @@ -987,6 +987,18 @@ struct kvm_vcpu_arch { > > /* Protected Guests */ > > bool guest_state_protected; > > > > +#ifdef CONFIG_PARAVIRT_SCHED_KVM > > + /* > > + * MSR to setup a shared memory for scheduling > > + * information sharing between host and guest. > > + */ > > + struct { > > + enum kvm_vcpu_boost_state boost_status; > > + u64 msr_val; > > + struct gfn_to_hva_cache data; > > + } pv_sched; > > +#endif > > + > > /* > > * Set when PDPTS were loaded directly by the userspace without > > * reading the guest memory > > @@ -2217,4 +2229,17 @@ int memslot_rmap_alloc(struct kvm_memory_slot *slot, unsigned long npages); > > */ > > #define KVM_EXIT_HYPERCALL_MBZ GENMASK_ULL(31, 1) > > > > +#ifdef CONFIG_PARAVIRT_SCHED_KVM > > +static inline bool kvm_arch_vcpu_pv_sched_enabled(struct kvm_vcpu_arch *arch) > > +{ > > + return arch->pv_sched.msr_val; > > +} > > + > > +static inline void kvm_arch_vcpu_set_boost_status(struct kvm_vcpu_arch *arch, > > + enum kvm_vcpu_boost_state boost_status) > > +{ > > + arch->pv_sched.boost_status = boost_status; > > +} > > +#endif > > + > > #endif /* _ASM_X86_KVM_HOST_H */ > > diff --git a/arch/x86/include/uapi/asm/kvm_para.h b/arch/x86/include/uapi/asm/kvm_para.h > > index 6e64b27b2c1e..6b1dea07a563 100644 > > --- a/arch/x86/include/uapi/asm/kvm_para.h > > +++ b/arch/x86/include/uapi/asm/kvm_para.h > > @@ -36,6 +36,7 @@ > > #define KVM_FEATURE_MSI_EXT_DEST_ID 15 > > #define KVM_FEATURE_HC_MAP_GPA_RANGE 16 > > #define KVM_FEATURE_MIGRATION_CONTROL 17 > > +#define KVM_FEATURE_PV_SCHED 18 > > > > #define KVM_HINTS_REALTIME 0 > > > > @@ -58,6 +59,7 @@ > > #define MSR_KVM_ASYNC_PF_INT 0x4b564d06 > > #define MSR_KVM_ASYNC_PF_ACK 0x4b564d07 > > #define MSR_KVM_MIGRATION_CONTROL 0x4b564d08 > > +#define MSR_KVM_PV_SCHED 0x4b564da0 > > > > struct kvm_steal_time { > > __u64 steal; > > @@ -150,4 +152,26 @@ struct kvm_vcpu_pv_apf_data { > > #define KVM_PV_EOI_ENABLED KVM_PV_EOI_MASK > > #define KVM_PV_EOI_DISABLED 0x0 > > > > +/* > > + * VCPU boost state shared between the host and guest. > > + */ > > +enum kvm_vcpu_boost_state { > > + /* Priority boosting feature disabled in host */ > > + VCPU_BOOST_DISABLED = 0, > > + /* > > + * vcpu is not explicitly boosted by the host. > > + * (Default priority when the guest started) > > + */ > > + VCPU_BOOST_NORMAL, > > + /* vcpu is boosted by the host */ > > + VCPU_BOOST_BOOSTED > > +}; > > + > > +/* > > + * Structure passed in via MSR_KVM_PV_SCHED > > + */ > > +struct pv_sched_data { > > + __u64 boost_status; > > +}; > > + > > #endif /* _UAPI_ASM_X86_KVM_PARA_H */ > > diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig > > index 89ca7f4c1464..dbcba73fb508 100644 > > --- a/arch/x86/kvm/Kconfig > > +++ b/arch/x86/kvm/Kconfig > > @@ -141,4 +141,16 @@ config KVM_XEN > > config KVM_EXTERNAL_WRITE_TRACKING > > bool > > > > +config PARAVIRT_SCHED_KVM > > + bool "Enable paravirt scheduling capability for kvm" > > + depends on KVM > > + help > > + Paravirtualized scheduling facilitates the exchange of scheduling > > + related information between the host and guest through shared memory, > > + enhancing the efficiency of vCPU thread scheduling by the hypervisor. > > + An illustrative use case involves dynamically boosting the priority of > > + a vCPU thread when the guest is executing a latency-sensitive workload > > + on that specific vCPU. > > + This config enables paravirt scheduling in the kvm hypervisor. > > + > > endif # VIRTUALIZATION > > diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c > > index 7bdc66abfc92..960ef6e869f2 100644 > > --- a/arch/x86/kvm/cpuid.c > > +++ b/arch/x86/kvm/cpuid.c > > @@ -1113,6 +1113,8 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function) > > (1 << KVM_FEATURE_POLL_CONTROL) | > > (1 << KVM_FEATURE_PV_SCHED_YIELD) | > > (1 << KVM_FEATURE_ASYNC_PF_INT); > > + if (IS_ENABLED(CONFIG_PARAVIRT_SCHED_KVM)) > > + entry->eax |= (1 << KVM_FEATURE_PV_SCHED); > > > > if (sched_info_on()) > > entry->eax |= (1 << KVM_FEATURE_STEAL_TIME); > > diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c > > index 7bcf1a76a6ab..0f475b50ac83 100644 > > --- a/arch/x86/kvm/x86.c > > +++ b/arch/x86/kvm/x86.c > > @@ -3879,6 +3879,33 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) > > return 1; > > break; > > > > +#ifdef CONFIG_PARAVIRT_SCHED_KVM > > + case MSR_KVM_PV_SCHED: > > + if (!guest_pv_has(vcpu, KVM_FEATURE_PV_SCHED)) > > + return 1; > > + > > + if (!(data & KVM_MSR_ENABLED)) > > + break; > > + > > + if (!(data & ~KVM_MSR_ENABLED)) { > > + /* > > + * Disable the feature > > + */ > > + vcpu->arch.pv_sched.msr_val = 0; > > + kvm_set_vcpu_boosted(vcpu, false); > > + } if (!kvm_gfn_to_hva_cache_init(vcpu->kvm, > > + &vcpu->arch.pv_sched.data, data & ~KVM_MSR_ENABLED, > > + sizeof(struct pv_sched_data))) { > > + vcpu->arch.pv_sched.msr_val = data; > > + kvm_set_vcpu_boosted(vcpu, false); > > + } else { > > + pr_warn("MSR_KVM_PV_SCHED: kvm:%p, vcpu:%p, " > > + "msr value: %llx, kvm_gfn_to_hva_cache_init failed!\n", > > + vcpu->kvm, vcpu, data & ~KVM_MSR_ENABLED); > > As this is triggerable by the guest please drop this print (which is not > even ratelimited!). I think it would be better to just 'return 1;' in case > of kvm_gfn_to_hva_cache_init() failure but maybe you also need to > account for 'msr_info->host_initiated' to not fail setting this MSR from > the host upon migration. > Makes sense, shall remove the pr_warn. I hadn't thought about migration, thanks for bringing this up. Will make modifications to account for migration as well. Thanks, Vineeth
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index f72b30d2238a..f89ba1f07d88 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -987,6 +987,18 @@ struct kvm_vcpu_arch { /* Protected Guests */ bool guest_state_protected; +#ifdef CONFIG_PARAVIRT_SCHED_KVM + /* + * MSR to setup a shared memory for scheduling + * information sharing between host and guest. + */ + struct { + enum kvm_vcpu_boost_state boost_status; + u64 msr_val; + struct gfn_to_hva_cache data; + } pv_sched; +#endif + /* * Set when PDPTS were loaded directly by the userspace without * reading the guest memory @@ -2217,4 +2229,17 @@ int memslot_rmap_alloc(struct kvm_memory_slot *slot, unsigned long npages); */ #define KVM_EXIT_HYPERCALL_MBZ GENMASK_ULL(31, 1) +#ifdef CONFIG_PARAVIRT_SCHED_KVM +static inline bool kvm_arch_vcpu_pv_sched_enabled(struct kvm_vcpu_arch *arch) +{ + return arch->pv_sched.msr_val; +} + +static inline void kvm_arch_vcpu_set_boost_status(struct kvm_vcpu_arch *arch, + enum kvm_vcpu_boost_state boost_status) +{ + arch->pv_sched.boost_status = boost_status; +} +#endif + #endif /* _ASM_X86_KVM_HOST_H */ diff --git a/arch/x86/include/uapi/asm/kvm_para.h b/arch/x86/include/uapi/asm/kvm_para.h index 6e64b27b2c1e..6b1dea07a563 100644 --- a/arch/x86/include/uapi/asm/kvm_para.h +++ b/arch/x86/include/uapi/asm/kvm_para.h @@ -36,6 +36,7 @@ #define KVM_FEATURE_MSI_EXT_DEST_ID 15 #define KVM_FEATURE_HC_MAP_GPA_RANGE 16 #define KVM_FEATURE_MIGRATION_CONTROL 17 +#define KVM_FEATURE_PV_SCHED 18 #define KVM_HINTS_REALTIME 0 @@ -58,6 +59,7 @@ #define MSR_KVM_ASYNC_PF_INT 0x4b564d06 #define MSR_KVM_ASYNC_PF_ACK 0x4b564d07 #define MSR_KVM_MIGRATION_CONTROL 0x4b564d08 +#define MSR_KVM_PV_SCHED 0x4b564da0 struct kvm_steal_time { __u64 steal; @@ -150,4 +152,26 @@ struct kvm_vcpu_pv_apf_data { #define KVM_PV_EOI_ENABLED KVM_PV_EOI_MASK #define KVM_PV_EOI_DISABLED 0x0 +/* + * VCPU boost state shared between the host and guest. + */ +enum kvm_vcpu_boost_state { + /* Priority boosting feature disabled in host */ + VCPU_BOOST_DISABLED = 0, + /* + * vcpu is not explicitly boosted by the host. + * (Default priority when the guest started) + */ + VCPU_BOOST_NORMAL, + /* vcpu is boosted by the host */ + VCPU_BOOST_BOOSTED +}; + +/* + * Structure passed in via MSR_KVM_PV_SCHED + */ +struct pv_sched_data { + __u64 boost_status; +}; + #endif /* _UAPI_ASM_X86_KVM_PARA_H */ diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig index 89ca7f4c1464..dbcba73fb508 100644 --- a/arch/x86/kvm/Kconfig +++ b/arch/x86/kvm/Kconfig @@ -141,4 +141,16 @@ config KVM_XEN config KVM_EXTERNAL_WRITE_TRACKING bool +config PARAVIRT_SCHED_KVM + bool "Enable paravirt scheduling capability for kvm" + depends on KVM + help + Paravirtualized scheduling facilitates the exchange of scheduling + related information between the host and guest through shared memory, + enhancing the efficiency of vCPU thread scheduling by the hypervisor. + An illustrative use case involves dynamically boosting the priority of + a vCPU thread when the guest is executing a latency-sensitive workload + on that specific vCPU. + This config enables paravirt scheduling in the kvm hypervisor. + endif # VIRTUALIZATION diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index 7bdc66abfc92..960ef6e869f2 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -1113,6 +1113,8 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function) (1 << KVM_FEATURE_POLL_CONTROL) | (1 << KVM_FEATURE_PV_SCHED_YIELD) | (1 << KVM_FEATURE_ASYNC_PF_INT); + if (IS_ENABLED(CONFIG_PARAVIRT_SCHED_KVM)) + entry->eax |= (1 << KVM_FEATURE_PV_SCHED); if (sched_info_on()) entry->eax |= (1 << KVM_FEATURE_STEAL_TIME); diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 7bcf1a76a6ab..0f475b50ac83 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -3879,6 +3879,33 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) return 1; break; +#ifdef CONFIG_PARAVIRT_SCHED_KVM + case MSR_KVM_PV_SCHED: + if (!guest_pv_has(vcpu, KVM_FEATURE_PV_SCHED)) + return 1; + + if (!(data & KVM_MSR_ENABLED)) + break; + + if (!(data & ~KVM_MSR_ENABLED)) { + /* + * Disable the feature + */ + vcpu->arch.pv_sched.msr_val = 0; + kvm_set_vcpu_boosted(vcpu, false); + } if (!kvm_gfn_to_hva_cache_init(vcpu->kvm, + &vcpu->arch.pv_sched.data, data & ~KVM_MSR_ENABLED, + sizeof(struct pv_sched_data))) { + vcpu->arch.pv_sched.msr_val = data; + kvm_set_vcpu_boosted(vcpu, false); + } else { + pr_warn("MSR_KVM_PV_SCHED: kvm:%p, vcpu:%p, " + "msr value: %llx, kvm_gfn_to_hva_cache_init failed!\n", + vcpu->kvm, vcpu, data & ~KVM_MSR_ENABLED); + } + break; +#endif + case MSR_KVM_POLL_CONTROL: if (!guest_pv_has(vcpu, KVM_FEATURE_POLL_CONTROL)) return 1; @@ -4239,6 +4266,11 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) msr_info->data = vcpu->arch.pv_eoi.msr_val; break; +#ifdef CONFIG_PARAVIRT_SCHED_KVM + case MSR_KVM_PV_SCHED: + msr_info->data = vcpu->arch.pv_sched.msr_val; + break; +#endif case MSR_KVM_POLL_CONTROL: if (!guest_pv_has(vcpu, KVM_FEATURE_POLL_CONTROL)) return 1; @@ -9820,6 +9852,29 @@ static int complete_hypercall_exit(struct kvm_vcpu *vcpu) return kvm_skip_emulated_instruction(vcpu); } +#ifdef CONFIG_PARAVIRT_SCHED_KVM +static void record_vcpu_boost_status(struct kvm_vcpu *vcpu) +{ + u64 val = vcpu->arch.pv_sched.boost_status; + + if (!kvm_arch_vcpu_pv_sched_enabled(&vcpu->arch)) + return; + + pagefault_disable(); + kvm_write_guest_offset_cached(vcpu->kvm, &vcpu->arch.pv_sched.data, + &val, offsetof(struct pv_sched_data, boost_status), sizeof(u64)); + pagefault_enable(); +} + +void kvm_set_vcpu_boosted(struct kvm_vcpu *vcpu, bool boosted) +{ + kvm_arch_vcpu_set_boost_status(&vcpu->arch, + boosted ? VCPU_BOOST_BOOSTED : VCPU_BOOST_NORMAL); + + kvm_make_request(KVM_REQ_VCPU_BOOST_UPDATE, vcpu); +} +#endif + int kvm_emulate_hypercall(struct kvm_vcpu *vcpu) { unsigned long nr, a0, a1, a2, a3, ret; @@ -10593,6 +10648,12 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) } if (kvm_check_request(KVM_REQ_STEAL_UPDATE, vcpu)) record_steal_time(vcpu); + +#ifdef CONFIG_PARAVIRT_SCHED_KVM + if (kvm_check_request(KVM_REQ_VCPU_BOOST_UPDATE, vcpu)) + record_vcpu_boost_status(vcpu); +#endif + #ifdef CONFIG_KVM_SMM if (kvm_check_request(KVM_REQ_SMI, vcpu)) process_smi(vcpu); diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 9d3ac7720da9..a74aeea55347 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -167,6 +167,7 @@ static inline bool is_error_page(struct page *page) #define KVM_REQ_VM_DEAD (1 | KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP) #define KVM_REQ_UNBLOCK 2 #define KVM_REQ_DIRTY_RING_SOFT_FULL 3 +#define KVM_REQ_VCPU_BOOST_UPDATE 6 #define KVM_REQUEST_ARCH_BASE 8 /* @@ -2287,4 +2288,8 @@ static inline void kvm_account_pgtable_pages(void *virt, int nr) /* Max number of entries allowed for each kvm dirty ring */ #define KVM_DIRTY_RING_MAX_ENTRIES 65536 +#ifdef CONFIG_PARAVIRT_SCHED_KVM +void kvm_set_vcpu_boosted(struct kvm_vcpu *vcpu, bool boosted); +#endif + #endif