@@ -573,7 +573,9 @@ int snp_lookup_rmpentry(u64 pfn, bool *assigned, int *level);
void snp_dump_hva_rmpentry(unsigned long address);
int psmash(u64 pfn);
int rmp_make_private(u64 pfn, u64 gpa, enum pg_level level, u32 asid, bool immutable);
+int rmp_make_private_mmio(u64 pfn, u64 gpa, enum pg_level level, u32 asid, bool immutable);
int rmp_make_shared(u64 pfn, enum pg_level level);
+int rmp_make_shared_mmio(u64 pfn, enum pg_level level);
void snp_leak_pages(u64 pfn, unsigned int npages);
void kdump_sev_callback(void);
void snp_fixup_e820_tables(void);
@@ -704,6 +704,7 @@ enum sev_cmd_id {
KVM_SEV_SNP_LAUNCH_START = 100,
KVM_SEV_SNP_LAUNCH_UPDATE,
KVM_SEV_SNP_LAUNCH_FINISH,
+ KVM_SEV_SNP_MMIO_RMP_UPDATE,
KVM_SEV_NR_MAX,
};
@@ -874,6 +875,16 @@ struct kvm_sev_snp_launch_finish {
__u64 pad1[4];
};
+#define KVM_SEV_SNP_RMP_FLAG_PRIVATE BIT(0)
+
+struct kvm_sev_snp_rmp_update {
+ __u32 flags; /* KVM_SEV_SNP_RMP_FLAG_xxxx */
+ __u32 pad0;
+ __u64 useraddr;
+ __u64 gpa;
+ __u64 size;
+};
+
#define KVM_X2APIC_API_USE_32BIT_IDS (1ULL << 0)
#define KVM_X2APIC_API_DISABLE_BROADCAST_QUIRK (1ULL << 1)
@@ -2541,6 +2541,8 @@ static int snp_launch_finish(struct kvm *kvm, struct kvm_sev_cmd *argp)
return ret;
}
+static int snp_mmio_rmp_update(struct kvm *kvm, struct kvm_sev_cmd *argp);
+
int sev_mem_enc_ioctl(struct kvm *kvm, void __user *argp)
{
struct kvm_sev_cmd sev_cmd;
@@ -2646,6 +2648,9 @@ int sev_mem_enc_ioctl(struct kvm *kvm, void __user *argp)
case KVM_SEV_SNP_LAUNCH_FINISH:
r = snp_launch_finish(kvm, &sev_cmd);
break;
+ case KVM_SEV_SNP_MMIO_RMP_UPDATE:
+ r = snp_mmio_rmp_update(kvm, &sev_cmd);
+ break;
default:
r = -EINVAL;
goto out;
@@ -4115,6 +4120,136 @@ static int snp_handle_ext_guest_req(struct vcpu_svm *svm, gpa_t req_gpa, gpa_t r
return 1; /* resume guest */
}
+static int hva_to_pfn_remapped(struct vm_area_struct *vma,
+ unsigned long addr, bool write_fault,
+ bool *writable, kvm_pfn_t *p_pfn)
+{
+ struct follow_pfnmap_args args = { .vma = vma, .address = addr };
+ kvm_pfn_t pfn;
+ int r;
+
+ r = follow_pfnmap_start(&args);
+ if (r) {
+ /*
+ * get_user_pages fails for VM_IO and VM_PFNMAP vmas and does
+ * not call the fault handler, so do it here.
+ */
+ bool unlocked = false;
+
+ r = fixup_user_fault(current->mm, addr,
+ (write_fault ? FAULT_FLAG_WRITE : 0),
+ &unlocked);
+ if (unlocked)
+ return -EAGAIN;
+ if (r)
+ return r;
+
+ r = follow_pfnmap_start(&args);
+ if (r)
+ return r;
+ }
+
+ if (write_fault && !args.writable) {
+ pfn = KVM_PFN_ERR_RO_FAULT;
+ goto out;
+ }
+
+ if (writable)
+ *writable = args.writable;
+ pfn = args.pfn;
+out:
+ follow_pfnmap_end(&args);
+ *p_pfn = pfn;
+
+ return r;
+}
+
+static bool vma_is_valid(struct vm_area_struct *vma, bool write_fault)
+{
+ if (unlikely(!(vma->vm_flags & VM_READ)))
+ return false;
+
+ if (write_fault && (unlikely(!(vma->vm_flags & VM_WRITE))))
+ return false;
+
+ return true;
+}
+
+static inline int check_user_page_hwpoison(unsigned long addr)
+{
+ int rc, flags = FOLL_HWPOISON | FOLL_WRITE;
+
+ rc = get_user_pages(addr, 1, flags, NULL);
+ return rc == -EHWPOISON;
+}
+
+static kvm_pfn_t hva_to_pfn(unsigned long addr, bool atomic, bool interruptible,
+ bool *async, bool write_fault, bool *writable)
+{
+ struct vm_area_struct *vma;
+ kvm_pfn_t pfn;
+ int r;
+
+ mmap_read_lock(current->mm);
+retry:
+ vma = vma_lookup(current->mm, addr);
+
+ if (vma == NULL)
+ pfn = KVM_PFN_ERR_FAULT;
+ else if (vma->vm_flags & (VM_IO | VM_PFNMAP)) {
+ // Here we only expect MMIO for validation
+ r = hva_to_pfn_remapped(vma, addr, write_fault, writable, &pfn);
+ if (r == -EAGAIN)
+ goto retry;
+ if (r < 0)
+ pfn = KVM_PFN_ERR_FAULT;
+ } else {
+ if (async && vma_is_valid(vma, write_fault))
+ *async = true;
+ pfn = KVM_PFN_ERR_FAULT;
+ }
+
+ mmap_read_unlock(current->mm);
+ return pfn;
+}
+
+
+static int snp_mmio_rmp_update(struct kvm *kvm, struct kvm_sev_cmd *argp)
+{
+ struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
+ struct kvm_sev_snp_rmp_update params;
+ bool async = false, writable = false;
+ int ret;
+
+ if (!sev_snp_guest(kvm))
+ return -ENOTTY;
+
+ if (!sev->snp_context)
+ return -EINVAL;
+
+ if (copy_from_user(¶ms, u64_to_user_ptr(argp->data), sizeof(params)))
+ return -EFAULT;
+
+ for (phys_addr_t off = 0; off < params.size; off += PAGE_SIZE) {
+ kvm_pfn_t pfn = hva_to_pfn(params.useraddr + off, false,
+ false /*interruptible*/,
+ &async, false, &writable);
+
+ if (is_error_pfn(pfn))
+ return -EINVAL;
+
+ if (params.flags & KVM_SEV_SNP_RMP_FLAG_PRIVATE)
+ ret = rmp_make_private_mmio(pfn, params.gpa + off, PG_LEVEL_4K,
+ sev->asid, false/*Immutable*/);
+ else
+ ret = rmp_make_shared_mmio(pfn, PG_LEVEL_4K);
+ if (ret)
+ break;
+ }
+
+ return ret;
+}
+
static int sev_handle_vmgexit_msr_protocol(struct vcpu_svm *svm)
{
struct vmcb_control_area *control = &svm->vmcb->control;
@@ -978,7 +978,7 @@ static int adjust_direct_map(u64 pfn, int rmp_level)
* The optimal solution would be range locking to avoid locking disjoint
* regions unnecessarily but there's no support for that yet.
*/
-static int rmpupdate(u64 pfn, struct rmp_state *state)
+static int rmpupdate(u64 pfn, struct rmp_state *state, bool mmio)
{
unsigned long paddr = pfn << PAGE_SHIFT;
int ret, level;
@@ -988,7 +988,7 @@ static int rmpupdate(u64 pfn, struct rmp_state *state)
level = RMP_TO_PG_LEVEL(state->pagesize);
- if (adjust_direct_map(pfn, level))
+ if (!mmio && adjust_direct_map(pfn, level))
return -EFAULT;
do {
@@ -1022,10 +1022,25 @@ int rmp_make_private(u64 pfn, u64 gpa, enum pg_level level, u32 asid, bool immut
state.gpa = gpa;
state.pagesize = PG_LEVEL_TO_RMP(level);
- return rmpupdate(pfn, &state);
+ return rmpupdate(pfn, &state, false);
}
EXPORT_SYMBOL_GPL(rmp_make_private);
+int rmp_make_private_mmio(u64 pfn, u64 gpa, enum pg_level level, u32 asid, bool immutable)
+{
+ struct rmp_state state;
+
+ memset(&state, 0, sizeof(state));
+ state.assigned = 1;
+ state.asid = asid;
+ state.immutable = immutable;
+ state.gpa = gpa;
+ state.pagesize = PG_LEVEL_TO_RMP(level);
+
+ return rmpupdate(pfn, &state, true);
+}
+EXPORT_SYMBOL_GPL(rmp_make_private_mmio);
+
/* Transition a page to hypervisor-owned/shared state in the RMP table. */
int rmp_make_shared(u64 pfn, enum pg_level level)
{
@@ -1034,10 +1049,21 @@ int rmp_make_shared(u64 pfn, enum pg_level level)
memset(&state, 0, sizeof(state));
state.pagesize = PG_LEVEL_TO_RMP(level);
- return rmpupdate(pfn, &state);
+ return rmpupdate(pfn, &state, false);
}
EXPORT_SYMBOL_GPL(rmp_make_shared);
+int rmp_make_shared_mmio(u64 pfn, enum pg_level level)
+{
+ struct rmp_state state;
+
+ memset(&state, 0, sizeof(state));
+ state.pagesize = PG_LEVEL_TO_RMP(level);
+
+ return rmpupdate(pfn, &state, true);
+}
+EXPORT_SYMBOL_GPL(rmp_make_shared_mmio);
+
void snp_leak_pages(u64 pfn, unsigned int npages)
{
struct page *page = pfn_to_page(pfn);
The TDI bind operation moves the TDI into "RUN" state which means that TEE resources are now to be used as encrypted, or the device will refuse to operate. This requires RMP setup for MMIO BARs which is done in 2 steps: - RMPUPDATE on the host to assign host's MMIO ranges to GPA (like RAM); - validate the RMP entry which is done via TIO GUEST REQUEST GHCB message (unlike RAM for which the VM could just call PVALIDATE) but TDI bind must complete first to ensure the TDI is in the LOCKED state so the location of MMIO is fixed. The bind happens on the first TIO GUEST REQUEST from the guest. At this point KVM does not have host TDI BDFn so it exits to QEMU which calls VFIO-IOMMUFD to bind the TDI. Now, RMPUPDATE need to be done, in some place on the way back to the guest. Possible places are: a) the VFIO-IOMMUFD bind handler (does not know GPAs); b) QEMU (can mmapp MMIO and knows GPA); c) the KVM handler which received the first TIO GUEST REQUEST (does not know host MMIO ranges or host BDFn). The b) approach is taken. Add an KVM ioctl() to update RMP table for a given MMIO range. Lots of cut-n-paste. The validation happens later on explicit guest requests. Signed-off-by: Alexey Kardashevskiy <aik@amd.com> --- arch/x86/include/asm/sev.h | 2 + arch/x86/include/uapi/asm/kvm.h | 11 ++ arch/x86/kvm/svm/sev.c | 135 ++++++++++++++++++++ arch/x86/virt/svm/sev.c | 34 ++++- 4 files changed, 178 insertions(+), 4 deletions(-)