@@ -104,6 +104,7 @@ enum psc_op {
(((u64)(v) & GENMASK_ULL(63, 12)) >> 12)
#define GHCB_HV_FT_SNP BIT_ULL(0)
+#define GHCB_HV_FT_SNP_AP_CREATION (BIT_ULL(1) | GHCB_HV_FT_SNP)
/* SNP Page State Change NAE event */
#define VMGEXIT_PSC_MAX_ENTRY 253
@@ -66,6 +66,8 @@ extern bool handle_vc_boot_ghcb(struct pt_regs *regs);
/* RMP page size */
#define RMP_PG_SIZE_4K 0
+#define RMPADJUST_VMSA_PAGE_BIT BIT(16)
+
#ifdef CONFIG_AMD_MEM_ENCRYPT
extern struct static_key_false sev_es_enable_key;
extern void __sev_es_ist_enter(struct pt_regs *regs);
@@ -130,6 +132,7 @@ void __init early_snp_set_memory_shared(unsigned long vaddr, unsigned long paddr
void __init snp_prep_memory(unsigned long paddr, unsigned int sz, enum psc_op op);
void snp_set_memory_shared(unsigned long vaddr, unsigned int npages);
void snp_set_memory_private(unsigned long vaddr, unsigned int npages);
+void snp_set_wakeup_secondary_cpu(void);
#else
static inline void sev_es_ist_enter(struct pt_regs *regs) { }
static inline void sev_es_ist_exit(void) { }
@@ -146,6 +149,7 @@ early_snp_set_memory_shared(unsigned long vaddr, unsigned long paddr, unsigned i
static inline void __init snp_prep_memory(unsigned long paddr, unsigned int sz, enum psc_op op) { }
static inline void snp_set_memory_shared(unsigned long vaddr, unsigned int npages) { }
static inline void snp_set_memory_private(unsigned long vaddr, unsigned int npages) { }
+static inline void snp_set_wakeup_secondary_cpu(void) { }
#endif
#endif
@@ -109,6 +109,10 @@
#define SVM_VMGEXIT_SET_AP_JUMP_TABLE 0
#define SVM_VMGEXIT_GET_AP_JUMP_TABLE 1
#define SVM_VMGEXIT_PSC 0x80000010
+#define SVM_VMGEXIT_AP_CREATION 0x80000013
+#define SVM_VMGEXIT_AP_CREATE_ON_INIT 0
+#define SVM_VMGEXIT_AP_CREATE 1
+#define SVM_VMGEXIT_AP_DESTROY 2
#define SVM_VMGEXIT_HV_FEATURES 0x8000fffd
#define SVM_VMGEXIT_UNSUPPORTED_EVENT 0x8000ffff
@@ -221,6 +225,7 @@
{ SVM_VMGEXIT_AP_HLT_LOOP, "vmgexit_ap_hlt_loop" }, \
{ SVM_VMGEXIT_AP_JUMP_TABLE, "vmgexit_ap_jump_table" }, \
{ SVM_VMGEXIT_PSC, "vmgexit_page_state_change" }, \
+ { SVM_VMGEXIT_AP_CREATION, "vmgexit_ap_creation" }, \
{ SVM_VMGEXIT_HV_FEATURES, "vmgexit_hypervisor_feature" }, \
{ SVM_EXIT_ERR, "invalid_guest_state" }
@@ -18,6 +18,7 @@
#include <linux/memblock.h>
#include <linux/kernel.h>
#include <linux/mm.h>
+#include <linux/cpumask.h>
#include <asm/cpu_entry_area.h>
#include <asm/stacktrace.h>
@@ -31,6 +32,7 @@
#include <asm/svm.h>
#include <asm/smp.h>
#include <asm/cpu.h>
+#include <asm/apic.h>
#define DR7_RESET_VALUE 0x400
@@ -87,6 +89,8 @@ struct ghcb_state {
static DEFINE_PER_CPU(struct sev_es_runtime_data*, runtime_data);
DEFINE_STATIC_KEY_FALSE(sev_es_enable_key);
+static DEFINE_PER_CPU(struct sev_es_save_area *, snp_vmsa);
+
static __always_inline bool on_vc_stack(struct pt_regs *regs)
{
unsigned long sp = regs->sp;
@@ -789,6 +793,231 @@ void snp_set_memory_private(unsigned long vaddr, unsigned int npages)
pvalidate_pages(vaddr, npages, 1);
}
+static int snp_set_vmsa(void *va, bool vmsa)
+{
+ u64 attrs;
+
+ /*
+ * The RMPADJUST instruction is used to set or clear the VMSA bit for
+ * a page. A change to the VMSA bit is only performed when running
+ * at VMPL0 and is ignored at other VMPL levels. If too low of a target
+ * VMPL level is specified, the instruction can succeed without changing
+ * the VMSA bit should the kernel not be in VMPL0. Using a target VMPL
+ * level of 1 will return a FAIL_PERMISSION error if the kernel is not
+ * at VMPL0, thus ensuring that the VMSA bit has been properly set when
+ * no error is returned.
+ */
+ attrs = 1;
+ if (vmsa)
+ attrs |= RMPADJUST_VMSA_PAGE_BIT;
+
+ return rmpadjust((unsigned long)va, RMP_PG_SIZE_4K, attrs);
+}
+
+#define __ATTR_BASE (SVM_SELECTOR_P_MASK | SVM_SELECTOR_S_MASK)
+#define INIT_CS_ATTRIBS (__ATTR_BASE | SVM_SELECTOR_READ_MASK | SVM_SELECTOR_CODE_MASK)
+#define INIT_DS_ATTRIBS (__ATTR_BASE | SVM_SELECTOR_WRITE_MASK)
+
+#define INIT_LDTR_ATTRIBS (SVM_SELECTOR_P_MASK | 2)
+#define INIT_TR_ATTRIBS (SVM_SELECTOR_P_MASK | 3)
+
+static void *snp_safe_alloc_page(void)
+{
+ unsigned long pfn;
+ struct page *p;
+
+ /*
+ * Allocate an SNP safe page to workaround the SNP erratum where
+ * the CPU will incorrectly signal an RMP violation #PF if a
+ * hugepage (2mb or 1gb) collides with the RMP entry of VMSA page.
+ * The recommeded workaround is to not use the large page.
+ *
+ * Allocate one extra page, use a page which is not 2mb aligned
+ * and free the other.
+ */
+ p = alloc_pages(GFP_KERNEL_ACCOUNT | __GFP_ZERO, 1);
+ if (!p)
+ return NULL;
+
+ split_page(p, 1);
+
+ pfn = page_to_pfn(p);
+ if (IS_ALIGNED(__pfn_to_phys(pfn), PMD_SIZE)) {
+ pfn++;
+ __free_page(p);
+ } else {
+ __free_page(pfn_to_page(pfn + 1));
+ }
+
+ return page_address(pfn_to_page(pfn));
+}
+
+static int wakeup_cpu_via_vmgexit(int apic_id, unsigned long start_ip)
+{
+ struct sev_es_save_area *cur_vmsa, *vmsa;
+ struct ghcb_state state;
+ unsigned long flags;
+ struct ghcb *ghcb;
+ int cpu, err, ret;
+ u8 sipi_vector;
+ u64 cr4;
+
+ if ((sev_hv_features & GHCB_HV_FT_SNP_AP_CREATION) != GHCB_HV_FT_SNP_AP_CREATION)
+ return -EOPNOTSUPP;
+
+ /*
+ * Verify the desired start IP against the known trampoline start IP
+ * to catch any future new trampolines that may be introduced that
+ * would require a new protected guest entry point.
+ */
+ if (WARN_ONCE(start_ip != real_mode_header->trampoline_start,
+ "Unsupported SEV-SNP start_ip: %lx\n", start_ip))
+ return -EINVAL;
+
+ /* Override start_ip with known protected guest start IP */
+ start_ip = real_mode_header->sev_es_trampoline_start;
+
+ /* Find the logical CPU for the APIC ID */
+ for_each_present_cpu(cpu) {
+ if (arch_match_cpu_phys_id(cpu, apic_id))
+ break;
+ }
+ if (cpu >= nr_cpu_ids)
+ return -EINVAL;
+
+ cur_vmsa = per_cpu(snp_vmsa, cpu);
+
+ /*
+ * A new VMSA is created each time because there is no guarantee that
+ * the current VMSA is the kernels or that the vCPU is not running. If
+ * an attempt was done to use the current VMSA with a running vCPU, a
+ * #VMEXIT of that vCPU would wipe out all of the settings being done
+ * here.
+ */
+ vmsa = (struct sev_es_save_area *)snp_safe_alloc_page();
+ if (!vmsa)
+ return -ENOMEM;
+
+ /* CR4 should maintain the MCE value */
+ cr4 = native_read_cr4() & X86_CR4_MCE;
+
+ /* Set the CS value based on the start_ip converted to a SIPI vector */
+ sipi_vector = (start_ip >> 12);
+ vmsa->cs.base = sipi_vector << 12;
+ vmsa->cs.limit = 0xffff;
+ vmsa->cs.attrib = INIT_CS_ATTRIBS;
+ vmsa->cs.selector = sipi_vector << 8;
+
+ /* Set the RIP value based on start_ip */
+ vmsa->rip = start_ip & 0xfff;
+
+ /* Set VMSA entries to the INIT values as documented in the APM */
+ vmsa->ds.limit = 0xffff;
+ vmsa->ds.attrib = INIT_DS_ATTRIBS;
+ vmsa->es = vmsa->ds;
+ vmsa->fs = vmsa->ds;
+ vmsa->gs = vmsa->ds;
+ vmsa->ss = vmsa->ds;
+
+ vmsa->gdtr.limit = 0xffff;
+ vmsa->ldtr.limit = 0xffff;
+ vmsa->ldtr.attrib = INIT_LDTR_ATTRIBS;
+ vmsa->idtr.limit = 0xffff;
+ vmsa->tr.limit = 0xffff;
+ vmsa->tr.attrib = INIT_TR_ATTRIBS;
+
+ vmsa->efer = 0x1000; /* Must set SVME bit */
+ vmsa->cr4 = cr4;
+ vmsa->cr0 = 0x60000010;
+ vmsa->dr7 = 0x400;
+ vmsa->dr6 = 0xffff0ff0;
+ vmsa->rflags = 0x2;
+ vmsa->g_pat = 0x0007040600070406ULL;
+ vmsa->xcr0 = 0x1;
+ vmsa->mxcsr = 0x1f80;
+ vmsa->x87_ftw = 0x5555;
+ vmsa->x87_fcw = 0x0040;
+
+ /*
+ * Set the SNP-specific fields for this VMSA:
+ * VMPL level
+ * SEV_FEATURES (matches the SEV STATUS MSR right shifted 2 bits)
+ */
+ vmsa->vmpl = 0;
+ vmsa->sev_features = sev_status >> 2;
+
+ /* Switch the page over to a VMSA page now that it is initialized */
+ ret = snp_set_vmsa(vmsa, true);
+ if (ret) {
+ pr_err("set VMSA page failed (%u)\n", ret);
+ free_page((unsigned long)vmsa);
+
+ return -EINVAL;
+ }
+
+ /* Issue VMGEXIT AP Creation NAE event */
+ local_irq_save(flags);
+
+ ghcb = __sev_get_ghcb(&state);
+
+ vc_ghcb_invalidate(ghcb);
+ ghcb_set_rax(ghcb, vmsa->sev_features);
+ ghcb_set_sw_exit_code(ghcb, SVM_VMGEXIT_AP_CREATION);
+ ghcb_set_sw_exit_info_1(ghcb, ((u64)apic_id << 32) | SVM_VMGEXIT_AP_CREATE);
+ ghcb_set_sw_exit_info_2(ghcb, __pa(vmsa));
+
+ sev_es_wr_ghcb_msr(__pa(ghcb));
+ VMGEXIT();
+
+ if (!ghcb_sw_exit_info_1_is_valid(ghcb) ||
+ lower_32_bits(ghcb->save.sw_exit_info_1)) {
+ pr_alert("SNP AP Creation error\n");
+ ret = -EINVAL;
+ }
+
+ __sev_put_ghcb(&state);
+
+ local_irq_restore(flags);
+
+ /* Perform cleanup if there was an error */
+ if (ret) {
+ err = snp_set_vmsa(vmsa, false);
+ if (err)
+ pr_err("clear VMSA page failed (%u), leaking page\n", err);
+ else
+ free_page((unsigned long)vmsa);
+
+ vmsa = NULL;
+ }
+
+ /* Free up any previous VMSA page */
+ if (cur_vmsa) {
+ err = snp_set_vmsa(cur_vmsa, false);
+ if (err)
+ pr_err("clear VMSA page failed (%u), leaking page\n", err);
+ else
+ free_page((unsigned long)cur_vmsa);
+ }
+
+ /* Record the current VMSA page */
+ per_cpu(snp_vmsa, cpu) = vmsa;
+
+ return ret;
+}
+
+void snp_set_wakeup_secondary_cpu(void)
+{
+ if (!cc_platform_has(CC_ATTR_SEV_SNP))
+ return;
+
+ /*
+ * Always set this override if SEV-SNP is enabled. This makes it the
+ * required method to start APs under SEV-SNP. If the hypervisor does
+ * not support AP creation, then no APs will be started.
+ */
+ apic->wakeup_secondary_cpu = wakeup_cpu_via_vmgexit;
+}
+
int sev_es_setup_ap_jump_table(struct real_mode_header *rmh)
{
u16 startup_cs, startup_ip;
@@ -82,6 +82,7 @@
#include <asm/spec-ctrl.h>
#include <asm/hw_irq.h>
#include <asm/stackprotector.h>
+#include <asm/sev.h>
#ifdef CONFIG_ACPI_CPPC_LIB
#include <acpi/cppc_acpi.h>
@@ -1419,6 +1420,8 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus)
smp_quirk_init_udelay();
speculative_store_bypass_ht_init();
+
+ snp_set_wakeup_secondary_cpu();
}
void arch_thaw_secondary_cpus_begin(void)