@@ -37,6 +37,7 @@
#include <asm/kvm_vcpu_regs.h>
#include <asm/hyperv-tlfs.h>
#include <asm/reboot.h>
+#include <asm/asi.h>
#define __KVM_HAVE_ARCH_VCPU_DEBUGFS
@@ -1535,6 +1536,8 @@ struct kvm_arch {
*/
#define SPLIT_DESC_CACHE_MIN_NR_OBJECTS (SPTE_ENT_PER_PAGE + 1)
struct kvm_mmu_memory_cache split_desc_cache;
+
+ struct asi *asi;
};
struct kvm_vm_stat {
@@ -4186,6 +4186,7 @@ static noinstr void svm_vcpu_enter_exit(struct kvm_vcpu *vcpu, bool spec_ctrl_in
guest_state_enter_irqoff();
amd_clear_divider();
+ asi_enter(vcpu->kvm->arch.asi);
if (sev_es_guest(vcpu->kvm))
__svm_sev_es_vcpu_run(svm, spec_ctrl_intercepted,
@@ -4193,6 +4194,7 @@ static noinstr void svm_vcpu_enter_exit(struct kvm_vcpu *vcpu, bool spec_ctrl_in
else
__svm_vcpu_run(svm, spec_ctrl_intercepted);
+ asi_relax();
guest_state_exit_irqoff();
}
@@ -49,6 +49,7 @@
#include <asm/mwait.h>
#include <asm/spec-ctrl.h>
#include <asm/vmx.h>
+#include <asm/asi.h>
#include <trace/events/ipi.h>
@@ -7282,14 +7283,34 @@ static noinstr void vmx_vcpu_enter_exit(struct kvm_vcpu *vcpu,
unsigned int flags)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
+ unsigned long cr3;
guest_state_enter_irqoff();
+ asi_enter(vcpu->kvm->arch.asi);
+
+ /*
+ * Refresh vmcs.HOST_CR3 if necessary. This must be done immediately
+ * prior to VM-Enter, as the kernel may load a new ASID (PCID) any time
+ * it switches back to the current->mm, which can occur in KVM context
+ * when switching to a temporary mm to patch kernel code, e.g. if KVM
+ * toggles a static key while handling a VM-Exit.
+ * Also, this must be done after asi_enter(), as it changes CR3
+ * when switching address spaces.
+ */
+ cr3 = __get_current_cr3_fast();
+ if (unlikely(cr3 != vmx->loaded_vmcs->host_state.cr3)) {
+ vmcs_writel(HOST_CR3, cr3);
+ vmx->loaded_vmcs->host_state.cr3 = cr3;
+ }
/*
* L1D Flush includes CPU buffer clear to mitigate MDS, but VERW
* mitigation for MDS is done late in VMentry and is still
* executed in spite of L1D Flush. This is because an extra VERW
* should not matter much after the big hammer L1D Flush.
+ *
+ * This is only after asi_enter() for performance reasons.
+ * RFC: This also needs to be integrated with ASI's tainting model.
*/
if (static_branch_unlikely(&vmx_l1d_should_flush))
vmx_l1d_flush(vcpu);
@@ -7310,6 +7331,8 @@ static noinstr void vmx_vcpu_enter_exit(struct kvm_vcpu *vcpu,
vmx->idt_vectoring_info = 0;
+ asi_relax();
+
vmx_enable_fb_clear(vmx);
if (unlikely(vmx->fail)) {
@@ -7338,7 +7361,7 @@ static noinstr void vmx_vcpu_enter_exit(struct kvm_vcpu *vcpu,
fastpath_t vmx_vcpu_run(struct kvm_vcpu *vcpu, bool force_immediate_exit)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
- unsigned long cr3, cr4;
+ unsigned long cr4;
/* Record the guest's net vcpu time for enforced NMI injections. */
if (unlikely(!enable_vnmi &&
@@ -7381,19 +7404,6 @@ fastpath_t vmx_vcpu_run(struct kvm_vcpu *vcpu, bool force_immediate_exit)
vmcs_writel(GUEST_RIP, vcpu->arch.regs[VCPU_REGS_RIP]);
vcpu->arch.regs_dirty = 0;
- /*
- * Refresh vmcs.HOST_CR3 if necessary. This must be done immediately
- * prior to VM-Enter, as the kernel may load a new ASID (PCID) any time
- * it switches back to the current->mm, which can occur in KVM context
- * when switching to a temporary mm to patch kernel code, e.g. if KVM
- * toggles a static key while handling a VM-Exit.
- */
- cr3 = __get_current_cr3_fast();
- if (unlikely(cr3 != vmx->loaded_vmcs->host_state.cr3)) {
- vmcs_writel(HOST_CR3, cr3);
- vmx->loaded_vmcs->host_state.cr3 = cr3;
- }
-
cr4 = cr4_read_shadow();
if (unlikely(cr4 != vmx->loaded_vmcs->host_state.cr4)) {
vmcs_writel(HOST_CR4, cr4);
@@ -85,6 +85,7 @@
#include <asm/emulate_prefix.h>
#include <asm/sgx.h>
#include <clocksource/hyperv_timer.h>
+#include <asm/asi.h>
#define CREATE_TRACE_POINTS
#include "trace.h"
@@ -9674,6 +9675,55 @@ static void kvm_x86_check_cpu_compat(void *ret)
*(int *)ret = kvm_x86_check_processor_compatibility();
}
+#ifdef CONFIG_MITIGATION_ADDRESS_SPACE_ISOLATION
+static inline int kvm_x86_init_asi_class(void)
+{
+ static struct asi_taint_policy policy = {
+ /*
+ * Prevent going to the guest with sensitive data potentially
+ * left in sidechannels by code running in the unrestricted
+ * address space, or another MM.
+ */
+ .protect_data = ASI_TAINT_KERNEL_DATA | ASI_TAINT_OTHER_MM_DATA,
+ /*
+ * Prevent going to the guest with branch predictor state
+ * influenced by other processes. Note this bit is about
+ * protecting the guest from other parts of the system, while
+ * data_taints is about protecting other parts of the system
+ * from the guest.
+ */
+ .prevent_control = ASI_TAINT_OTHER_MM_CONTROL,
+ .set = ASI_TAINT_GUEST_DATA,
+ };
+
+ /*
+ * Inform ASI that the guest will gain control of the branch predictor,
+ * unless we're just unconditionally blasting it after VM Exit.
+ *
+ * RFC: This is a bit simplified - on some configurations we could avoid
+ * a duplicated RSB-fill if we had a separate taint specifically for the
+ * RSB.
+ */
+ if (!cpu_feature_enabled(X86_FEATURE_IBPB_ON_VMEXIT) ||
+ !IS_ENABLED(CONFIG_MITIGATION_RETPOLINE) ||
+ !cpu_feature_enabled(X86_FEATURE_RSB_VMEXIT))
+ policy.set = ASI_TAINT_GUEST_CONTROL;
+
+ /*
+ * And the same for data left behind by code in the userspace domain
+ * (i.e. the VMM itself, plus kernel code serving its syscalls etc).
+ * This should eventually be configurable: users whose VMMs contain
+ * no secrets can disable it to avoid paying a mitigation cost on
+ * transition between their guest and userspace.
+ */
+ policy.protect_data |= ASI_TAINT_USER_DATA;
+
+ return asi_init_class(ASI_CLASS_KVM, &policy);
+}
+#else /* CONFIG_MITIGATION_ADDRESS_SPACE_ISOLATION */
+static inline int kvm_x86_init_asi_class(void) { return 0; }
+#endif /* CONFIG_MITIGATION_ADDRESS_SPACE_ISOLATION */
+
int kvm_x86_vendor_init(struct kvm_x86_init_ops *ops)
{
u64 host_pat;
@@ -9737,6 +9787,10 @@ int kvm_x86_vendor_init(struct kvm_x86_init_ops *ops)
kvm_caps.supported_vm_types = BIT(KVM_X86_DEFAULT_VM);
kvm_caps.supported_mce_cap = MCG_CTL_P | MCG_SER_P;
+ r = kvm_x86_init_asi_class();
+ if (r < 0)
+ goto out_mmu_exit;
+
if (boot_cpu_has(X86_FEATURE_XSAVE)) {
kvm_host.xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK);
kvm_caps.supported_xcr0 = kvm_host.xcr0 & KVM_SUPPORTED_XCR0;
@@ -9754,7 +9808,7 @@ int kvm_x86_vendor_init(struct kvm_x86_init_ops *ops)
r = ops->hardware_setup();
if (r != 0)
- goto out_mmu_exit;
+ goto out_asi_uninit;
kvm_ops_update(ops);
@@ -9810,6 +9864,8 @@ int kvm_x86_vendor_init(struct kvm_x86_init_ops *ops)
out_unwind_ops:
kvm_x86_ops.enable_virtualization_cpu = NULL;
kvm_x86_call(hardware_unsetup)();
+out_asi_uninit:
+ asi_uninit_class(ASI_CLASS_KVM);
out_mmu_exit:
kvm_mmu_vendor_module_exit();
out_free_percpu:
@@ -9841,6 +9897,7 @@ void kvm_x86_vendor_exit(void)
cancel_work_sync(&pvclock_gtod_work);
#endif
kvm_x86_call(hardware_unsetup)();
+ asi_uninit_class(ASI_CLASS_KVM);
kvm_mmu_vendor_module_exit();
free_percpu(user_return_msrs);
kmem_cache_destroy(x86_emulator_cache);
@@ -11574,6 +11631,13 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
r = vcpu_run(vcpu);
+ /*
+ * At present ASI doesn't have the capability to transition directly
+ * from the restricted address space to the user address space. So we
+ * just return to the unrestricted address space in between.
+ */
+ asi_exit();
+
out:
kvm_put_guest_fpu(vcpu);
if (kvm_run->kvm_valid_regs)
@@ -12705,6 +12769,14 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
if (ret)
goto out_uninit_mmu;
+ ret = asi_init(kvm->mm, ASI_CLASS_KVM, &kvm->arch.asi);
+ if (ret)
+ goto out_uninit_mmu;
+
+ ret = static_call(kvm_x86_vm_init)(kvm);
+ if (ret)
+ goto out_asi_destroy;
+
INIT_HLIST_HEAD(&kvm->arch.mask_notifier_list);
atomic_set(&kvm->arch.noncoherent_dma_count, 0);
@@ -12742,6 +12814,8 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
return 0;
+out_asi_destroy:
+ asi_destroy(kvm->arch.asi);
out_uninit_mmu:
kvm_mmu_uninit_vm(kvm);
kvm_page_track_cleanup(kvm);
@@ -12883,6 +12957,7 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
kvm_destroy_vcpus(kvm);
kvfree(rcu_dereference_check(kvm->arch.apic_map, 1));
kfree(srcu_dereference_check(kvm->arch.pmu_event_filter, &kvm->srcu, 1));
+ asi_destroy(kvm->arch.asi);
kvm_mmu_uninit_vm(kvm);
kvm_page_track_cleanup(kvm);
kvm_xen_destroy_vm(kvm);
An ASI restricted address space is added for KVM. This protects the userspace from attack by the guest, and the guest from attack by other processes. It doesn't attempt to prevent the guest from attack by the current process. This change incorporates an extra asi_exit at the end of vcpu_run. We expect later iterations of ASI to drop that call as we gain the ability to context switch within the ASI domain. Signed-off-by: Brendan Jackman <jackmanb@google.com> --- arch/x86/include/asm/kvm_host.h | 3 ++ arch/x86/kvm/svm/svm.c | 2 ++ arch/x86/kvm/vmx/vmx.c | 38 ++++++++++++-------- arch/x86/kvm/x86.c | 77 ++++++++++++++++++++++++++++++++++++++++- 4 files changed, 105 insertions(+), 15 deletions(-)