[13/15] KVM: x86: Move ownership of passthrough MSR "shadow" to common x86

Message ID	20241127201929.4005605-14-aaronlewis@google.com (mailing list archive)
State	New
Headers	show Received: from mail-oa1-f73.google.com (mail-oa1-f73.google.com [209.85.160.73]) (using TLSv1.2 with cipher ECDHE-RSA-AES128-GCM-SHA256 (128/128 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 6FC23204085 for <kvm@vger.kernel.org>; Wed, 27 Nov 2024 20:20:11 +0000 (UTC) Date: Wed, 27 Nov 2024 20:19:27 +0000 In-Reply-To: <20241127201929.4005605-1-aaronlewis@google.com> Precedence: bulk Mime-Version: 1.0 References: <20241127201929.4005605-1-aaronlewis@google.com> Message-ID: <20241127201929.4005605-14-aaronlewis@google.com> Subject: [PATCH 13/15] KVM: x86: Move ownership of passthrough MSR "shadow" to common x86 From: Aaron Lewis <aaronlewis@google.com> To: kvm@vger.kernel.org Cc: pbonzini@redhat.com, jmattson@google.com, seanjc@google.com, Aaron Lewis <aaronlewis@google.com> Content-Type: text/plain; charset="UTF-8"
Series	Unify MSR intercepts in x86 \| expand [00/15] Unify MSR intercepts in x86 [01/15] KVM: x86: Use non-atomic bit ops to manipulate "shadow" MSR intercepts [02/15] KVM: SVM: Use non-atomic bit ops to manipulate MSR interception bitmaps [03/15] KVM: SVM: Invert the polarity of the "shadow" MSR interception bitmaps [04/15] KVM: SVM: Track MSRPM as "unsigned long", not "u32" [05/15] KVM: x86: SVM: Adopt VMX style MSR intercepts in SVM [06/15] KVM: SVM: Disable intercepts for all direct access MSRs on MSR filter changes [07/15] KVM: SVM: Delete old SVM MSR management code [08/15] KVM: SVM: Pass through GHCB MSR if and only if VM is SEV-ES [09/15] KVM: SVM: Drop "always" flag from list of possible passthrough MSRs [10/15] KVM: SVM: Don't "NULL terminate" the list of possible passthrough MSRs [11/15] KVM: VMX: Make list of possible passthrough MSRs "const" [12/15] KVM: x86: Track possible passthrough MSRs in kvm_x86_ops [13/15] KVM: x86: Move ownership of passthrough MSR "shadow" to common x86 [14/15] KVM: x86: Hoist SVM MSR intercepts to common x86 code [15/15] KVM: x86: Hoist VMX MSR intercepts to common x86 code

diff --git a/arch/x86/include/asm/kvm-x86-ops.h b/arch/x86/include/asm/kvm-x86-ops.h index 5aff7222e40fa..124c2e1e42026 100644 --- a/arch/x86/include/asm/kvm-x86-ops.h +++ b/arch/x86/include/asm/kvm-x86-ops.h @@ -131,7 +131,8 @@ KVM_X86_OP(check_emulate_instruction) KVM_X86_OP(apic_init_signal_blocked) KVM_X86_OP_OPTIONAL(enable_l2_tlb_flush) KVM_X86_OP_OPTIONAL(migrate_timers) -KVM_X86_OP(msr_filter_changed) +KVM_X86_OP_OPTIONAL(msr_filter_changed) +KVM_X86_OP(disable_intercept_for_msr) KVM_X86_OP(complete_emulated_msr) KVM_X86_OP(vcpu_deliver_sipi_vector) KVM_X86_OP_OPTIONAL_RET0(vcpu_get_apicv_inhibit_reasons); diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 7e9fee4d36cc2..808b5365e4bd2 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -777,6 +777,16 @@ struct kvm_vcpu_arch { u64 arch_capabilities; u64 perf_capabilities; + /* + * KVM's "shadow" of the MSR intercepts, i.e. bitmaps that track KVM's + * desired behavior irrespective of userspace MSR filtering. + */ +#define KVM_MAX_POSSIBLE_PASSTHROUGH_MSRS 64 + struct { + DECLARE_BITMAP(read, KVM_MAX_POSSIBLE_PASSTHROUGH_MSRS); + DECLARE_BITMAP(write, KVM_MAX_POSSIBLE_PASSTHROUGH_MSRS); + } shadow_msr_intercept; + /* * Paging state of the vcpu * @@ -1820,6 +1830,7 @@ struct kvm_x86_ops { const u32 * const possible_passthrough_msrs; const u32 nr_possible_passthrough_msrs; + void (*disable_intercept_for_msr)(struct kvm_vcpu *vcpu, u32 msr, int type); void (*msr_filter_changed)(struct kvm_vcpu *vcpu); int (*complete_emulated_msr)(struct kvm_vcpu *vcpu, int err); diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index 23e6515bb7904..31ed6c68e8194 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -825,9 +825,9 @@ void svm_disable_intercept_for_msr(struct kvm_vcpu *vcpu, u32 msr, int type) if (slot >= 0) { /* Set the shadow bitmaps to the desired intercept states */ if (type & MSR_TYPE_R) - __clear_bit(slot, svm->shadow_msr_intercept.read); + __clear_bit(slot, vcpu->arch.shadow_msr_intercept.read); if (type & MSR_TYPE_W) - __clear_bit(slot, svm->shadow_msr_intercept.write); + __clear_bit(slot, vcpu->arch.shadow_msr_intercept.write); } /* @@ -864,9 +864,9 @@ void svm_enable_intercept_for_msr(struct kvm_vcpu *vcpu, u32 msr, int type) if (slot >= 0) { /* Set the shadow bitmaps to the desired intercept states */ if (type & MSR_TYPE_R) - __set_bit(slot, svm->shadow_msr_intercept.read); + __set_bit(slot, vcpu->arch.shadow_msr_intercept.read); if (type & MSR_TYPE_W) - __set_bit(slot, svm->shadow_msr_intercept.write); + __set_bit(slot, vcpu->arch.shadow_msr_intercept.write); } if (type & MSR_TYPE_R) @@ -939,30 +939,6 @@ void svm_vcpu_free_msrpm(unsigned long *msrpm) __free_pages(virt_to_page(msrpm), get_order(MSRPM_SIZE)); } -static void svm_msr_filter_changed(struct kvm_vcpu *vcpu) -{ - struct vcpu_svm *svm = to_svm(vcpu); - u32 i; - - /* - * Redo intercept permissions for MSRs that KVM is passing through to - * the guest. Disabling interception will check the new MSR filter and - * ensure that KVM enables interception if usersepace wants to filter - * the MSR. MSRs that KVM is already intercepting don't need to be - * refreshed since KVM is going to intercept them regardless of what - * userspace wants. - */ - for (i = 0; i < ARRAY_SIZE(direct_access_msrs); i++) { - u32 msr = direct_access_msrs[i]; - - if (!test_bit(i, svm->shadow_msr_intercept.read)) - svm_disable_intercept_for_msr(vcpu, msr, MSR_TYPE_R); - - if (!test_bit(i, svm->shadow_msr_intercept.write)) - svm_disable_intercept_for_msr(vcpu, msr, MSR_TYPE_W); - } -} - static void add_msr_offset(u32 offset) { int i; @@ -1475,10 +1451,6 @@ static int svm_vcpu_create(struct kvm_vcpu *vcpu) if (err) goto error_free_vmsa_page; - /* All MSRs start out in the "intercepted" state. */ - bitmap_fill(svm->shadow_msr_intercept.read, MAX_DIRECT_ACCESS_MSRS); - bitmap_fill(svm->shadow_msr_intercept.write, MAX_DIRECT_ACCESS_MSRS); - svm->msrpm = svm_vcpu_alloc_msrpm(); if (!svm->msrpm) { err = -ENOMEM; @@ -5155,7 +5127,7 @@ static struct kvm_x86_ops svm_x86_ops __initdata = { .possible_passthrough_msrs = direct_access_msrs, .nr_possible_passthrough_msrs = ARRAY_SIZE(direct_access_msrs), - .msr_filter_changed = svm_msr_filter_changed, + .disable_intercept_for_msr = svm_disable_intercept_for_msr, .complete_emulated_msr = svm_complete_emulated_msr, .vcpu_deliver_sipi_vector = svm_vcpu_deliver_sipi_vector, diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h index 2513990c5b6e6..a73da8ca73b49 100644 --- a/arch/x86/kvm/svm/svm.h +++ b/arch/x86/kvm/svm/svm.h @@ -313,12 +313,6 @@ struct vcpu_svm { struct list_head ir_list; spinlock_t ir_list_lock; - /* Save desired MSR intercept (read: pass-through) state */ - struct { - DECLARE_BITMAP(read, MAX_DIRECT_ACCESS_MSRS); - DECLARE_BITMAP(write, MAX_DIRECT_ACCESS_MSRS); - } shadow_msr_intercept; - struct vcpu_sev_es_state sev_es; bool guest_state_loaded; diff --git a/arch/x86/kvm/vmx/main.c b/arch/x86/kvm/vmx/main.c index 6d52693b0fd6c..5279c82648fe6 100644 --- a/arch/x86/kvm/vmx/main.c +++ b/arch/x86/kvm/vmx/main.c @@ -32,37 +32,6 @@ static const u32 vmx_possible_passthrough_msrs[] = { MSR_CORE_C7_RESIDENCY, }; -void vmx_msr_filter_changed(struct kvm_vcpu *vcpu) -{ - struct vcpu_vmx *vmx = to_vmx(vcpu); - u32 i; - - if (!cpu_has_vmx_msr_bitmap()) - return; - - /* - * Redo intercept permissions for MSRs that KVM is passing through to - * the guest. Disabling interception will check the new MSR filter and - * ensure that KVM enables interception if usersepace wants to filter - * the MSR. MSRs that KVM is already intercepting don't need to be - * refreshed since KVM is going to intercept them regardless of what - * userspace wants. - */ - for (i = 0; i < ARRAY_SIZE(vmx_possible_passthrough_msrs); i++) { - u32 msr = vmx_possible_passthrough_msrs[i]; - - if (!test_bit(i, vmx->shadow_msr_intercept.read)) - vmx_disable_intercept_for_msr(vcpu, msr, MSR_TYPE_R); - - if (!test_bit(i, vmx->shadow_msr_intercept.write)) - vmx_disable_intercept_for_msr(vcpu, msr, MSR_TYPE_W); - } - - /* PT MSRs can be passed through iff PT is exposed to the guest. */ - if (vmx_pt_mode_is_host_guest()) - pt_update_intercept_for_msr(vcpu); -} - #define VMX_REQUIRED_APICV_INHIBITS \ (BIT(APICV_INHIBIT_REASON_DISABLED) | \ BIT(APICV_INHIBIT_REASON_ABSENT) | \ @@ -210,6 +179,7 @@ struct kvm_x86_ops vt_x86_ops __initdata = { .possible_passthrough_msrs = vmx_possible_passthrough_msrs, .nr_possible_passthrough_msrs = ARRAY_SIZE(vmx_possible_passthrough_msrs), + .disable_intercept_for_msr = vmx_disable_intercept_for_msr, .msr_filter_changed = vmx_msr_filter_changed, .complete_emulated_msr = kvm_complete_insn_gp, diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 1c2c0c06f3d35..4cb3e9a8df2c0 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -3987,9 +3987,9 @@ void vmx_disable_intercept_for_msr(struct kvm_vcpu *vcpu, u32 msr, int type) idx = vmx_get_passthrough_msr_slot(msr); if (idx >= 0) { if (type & MSR_TYPE_R) - __clear_bit(idx, vmx->shadow_msr_intercept.read); + __clear_bit(idx, vcpu->arch.shadow_msr_intercept.read); if (type & MSR_TYPE_W) - __clear_bit(idx, vmx->shadow_msr_intercept.write); + __clear_bit(idx, vcpu->arch.shadow_msr_intercept.write); } if ((type & MSR_TYPE_R) && @@ -4029,9 +4029,9 @@ void vmx_enable_intercept_for_msr(struct kvm_vcpu *vcpu, u32 msr, int type) idx = vmx_get_passthrough_msr_slot(msr); if (idx >= 0) { if (type & MSR_TYPE_R) - __set_bit(idx, vmx->shadow_msr_intercept.read); + __set_bit(idx, vcpu->arch.shadow_msr_intercept.read); if (type & MSR_TYPE_W) - __set_bit(idx, vmx->shadow_msr_intercept.write); + __set_bit(idx, vcpu->arch.shadow_msr_intercept.write); } if (type & MSR_TYPE_R) @@ -4117,6 +4117,16 @@ void pt_update_intercept_for_msr(struct kvm_vcpu *vcpu) } } +void vmx_msr_filter_changed(struct kvm_vcpu *vcpu) +{ + if (!cpu_has_vmx_msr_bitmap()) + return; + + /* PT MSRs can be passed through iff PT is exposed to the guest. */ + if (vmx_pt_mode_is_host_guest()) + pt_update_intercept_for_msr(vcpu); +} + static inline void kvm_vcpu_trigger_posted_interrupt(struct kvm_vcpu *vcpu, int pi_vec) { @@ -7513,10 +7523,6 @@ int vmx_vcpu_create(struct kvm_vcpu *vcpu) evmcs->hv_enlightenments_control.msr_bitmap = 1; } - /* The MSR bitmap starts with all ones */ - bitmap_fill(vmx->shadow_msr_intercept.read, MAX_POSSIBLE_PASSTHROUGH_MSRS); - bitmap_fill(vmx->shadow_msr_intercept.write, MAX_POSSIBLE_PASSTHROUGH_MSRS); - vmx_disable_intercept_for_msr(vcpu, MSR_IA32_TSC, MSR_TYPE_R); #ifdef CONFIG_X86_64 vmx_disable_intercept_for_msr(vcpu, MSR_FS_BASE, MSR_TYPE_RW); diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h index 43f573f6ca46a..c40e7c880764f 100644 --- a/arch/x86/kvm/vmx/vmx.h +++ b/arch/x86/kvm/vmx/vmx.h @@ -353,13 +353,6 @@ struct vcpu_vmx { struct pt_desc pt_desc; struct lbr_desc lbr_desc; - /* Save desired MSR intercept (read: pass-through) state */ -#define MAX_POSSIBLE_PASSTHROUGH_MSRS 16 - struct { - DECLARE_BITMAP(read, MAX_POSSIBLE_PASSTHROUGH_MSRS); - DECLARE_BITMAP(write, MAX_POSSIBLE_PASSTHROUGH_MSRS); - } shadow_msr_intercept; - /* ve_info must be page aligned. */ struct vmx_ve_information *ve_info; }; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 20b6cce793af5..2082ae8dc5db1 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1819,6 +1819,31 @@ int kvm_passthrough_msr_slot(u32 msr) } EXPORT_SYMBOL_GPL(kvm_passthrough_msr_slot); +static void kvm_msr_filter_changed(struct kvm_vcpu *vcpu) +{ + u32 msr, i; + + /* + * Redo intercept permissions for MSRs that KVM is passing through to + * the guest. Disabling interception will check the new MSR filter and + * ensure that KVM enables interception if usersepace wants to filter + * the MSR. MSRs that KVM is already intercepting don't need to be + * refreshed since KVM is going to intercept them regardless of what + * userspace wants. + */ + for (i = 0; i < kvm_x86_ops.nr_possible_passthrough_msrs; i++) { + msr = kvm_x86_ops.possible_passthrough_msrs[i]; + + if (!test_bit(i, vcpu->arch.shadow_msr_intercept.read)) + static_call(kvm_x86_disable_intercept_for_msr)(vcpu, msr, MSR_TYPE_R); + + if (!test_bit(i, vcpu->arch.shadow_msr_intercept.write)) + static_call(kvm_x86_disable_intercept_for_msr)(vcpu, msr, MSR_TYPE_W); + } + + static_call_cond(kvm_x86_msr_filter_changed)(vcpu); +} + /* * Write @data into the MSR specified by @index. Select MSR specific fault * checks are bypassed if @host_initiated is %true. @@ -9747,6 +9772,10 @@ int kvm_x86_vendor_init(struct kvm_x86_init_ops *ops) if (boot_cpu_has(X86_FEATURE_ARCH_CAPABILITIES)) rdmsrl(MSR_IA32_ARCH_CAPABILITIES, kvm_host.arch_capabilities); + if (ops->runtime_ops->nr_possible_passthrough_msrs > + KVM_MAX_POSSIBLE_PASSTHROUGH_MSRS) + return -E2BIG; + r = ops->hardware_setup(); if (r != 0) goto out_mmu_exit; @@ -10851,7 +10880,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) if (kvm_check_request(KVM_REQ_APF_READY, vcpu)) kvm_check_async_pf_completion(vcpu); if (kvm_check_request(KVM_REQ_MSR_FILTER_CHANGED, vcpu)) - kvm_x86_call(msr_filter_changed)(vcpu); + kvm_msr_filter_changed(vcpu); if (kvm_check_request(KVM_REQ_UPDATE_CPU_DIRTY_LOGGING, vcpu)) kvm_x86_call(update_cpu_dirty_logging)(vcpu); @@ -12305,6 +12334,12 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu) vcpu->arch.hv_root_tdp = INVALID_PAGE; #endif + /* All MSRs start out in the "intercepted" state. */ + bitmap_fill(vcpu->arch.shadow_msr_intercept.read, + KVM_MAX_POSSIBLE_PASSTHROUGH_MSRS); + bitmap_fill(vcpu->arch.shadow_msr_intercept.write, + KVM_MAX_POSSIBLE_PASSTHROUGH_MSRS); + r = kvm_x86_call(vcpu_create)(vcpu); if (r) goto free_guest_fpu;

[13/15] KVM: x86: Move ownership of passthrough MSR "shadow" to common x86

Commit Message

Patch