Message ID | 20231016132819.1002933-7-michael.roth@amd.com (mailing list archive) |
---|---|
State | Not Applicable |
Delegated to: | Herbert Xu |
Headers | show |
Series | Add AMD Secure Nested Paging (SEV-SNP) Hypervisor Support | expand |
On 10/16/23 08:27, Michael Roth wrote: > From: Brijesh Singh <brijesh.singh@amd.com> > > The memory integrity guarantees of SEV-SNP are enforced through a new > structure called the Reverse Map Table (RMP). The RMP is a single data > structure shared across the system that contains one entry for every 4K > page of DRAM that may be used by SEV-SNP VMs. APM2 section 15.36 details > a number of steps needed to detect/enable SEV-SNP and RMP table support > on the host: > > - Detect SEV-SNP support based on CPUID bit > - Initialize the RMP table memory reported by the RMP base/end MSR > registers and configure IOMMU to be compatible with RMP access > restrictions > - Set the MtrrFixDramModEn bit in SYSCFG MSR > - Set the SecureNestedPagingEn and VMPLEn bits in the SYSCFG MSR > - Configure IOMMU > > RMP table entry format is non-architectural and it can vary by > processor. It is defined by the PPR. Restrict SNP support to CPU > models/families which are compatible with the current RMP table entry > format to guard against any undefined behavior when running on other > system types. Future models/support will handle this through an > architectural mechanism to allow for broader compatibility. > > SNP host code depends on CONFIG_KVM_AMD_SEV config flag, which may be > enabled even when CONFIG_AMD_MEM_ENCRYPT isn't set, so update the > SNP-specific IOMMU helpers used here to rely on CONFIG_KVM_AMD_SEV > instead of CONFIG_AMD_MEM_ENCRYPT. > > Co-developed-by: Ashish Kalra <ashish.kalra@amd.com> > Signed-off-by: Ashish Kalra <ashish.kalra@amd.com> > Co-developed-by: Tom Lendacky <thomas.lendacky@amd.com> > Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com> > Signed-off-by: Brijesh Singh <brijesh.singh@amd.com> > [mdr: rework commit message to be clearer about what patch does, squash > in early_rmptable_check() handling from Tom] > Signed-off-by: Michael Roth <michael.roth@amd.com> > --- > arch/x86/Kbuild | 2 + > arch/x86/include/asm/disabled-features.h | 8 +- > arch/x86/include/asm/msr-index.h | 11 +- > arch/x86/include/asm/sev.h | 6 + > arch/x86/kernel/cpu/amd.c | 19 ++ > arch/x86/virt/svm/Makefile | 3 + > arch/x86/virt/svm/sev.c | 239 +++++++++++++++++++++++ > drivers/iommu/amd/init.c | 2 +- > include/linux/amd-iommu.h | 2 +- > 9 files changed, 288 insertions(+), 4 deletions(-) > create mode 100644 arch/x86/virt/svm/Makefile > create mode 100644 arch/x86/virt/svm/sev.c > > diff --git a/arch/x86/Kbuild b/arch/x86/Kbuild > index 5a83da703e87..6a1f36df6a18 100644 > --- a/arch/x86/Kbuild > +++ b/arch/x86/Kbuild > @@ -28,5 +28,7 @@ obj-y += net/ > > obj-$(CONFIG_KEXEC_FILE) += purgatory/ > > +obj-y += virt/svm/ > + > # for cleaning > subdir- += boot tools > diff --git a/arch/x86/include/asm/disabled-features.h b/arch/x86/include/asm/disabled-features.h > index 702d93fdd10e..83efd407033b 100644 > --- a/arch/x86/include/asm/disabled-features.h > +++ b/arch/x86/include/asm/disabled-features.h > @@ -117,6 +117,12 @@ > #define DISABLE_IBT (1 << (X86_FEATURE_IBT & 31)) > #endif > > +#ifdef CONFIG_KVM_AMD_SEV > +# define DISABLE_SEV_SNP 0 > +#else > +# define DISABLE_SEV_SNP (1 << (X86_FEATURE_SEV_SNP & 31)) > +#endif > + > /* > * Make sure to add features to the correct mask > */ > @@ -141,7 +147,7 @@ > DISABLE_ENQCMD) > #define DISABLED_MASK17 0 > #define DISABLED_MASK18 (DISABLE_IBT) > -#define DISABLED_MASK19 0 > +#define DISABLED_MASK19 (DISABLE_SEV_SNP) > #define DISABLED_MASK20 0 > #define DISABLED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 21) > > diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h > index 1d111350197f..2be74afb4cbd 100644 > --- a/arch/x86/include/asm/msr-index.h > +++ b/arch/x86/include/asm/msr-index.h > @@ -589,6 +589,8 @@ > #define MSR_AMD64_SEV_ENABLED BIT_ULL(MSR_AMD64_SEV_ENABLED_BIT) > #define MSR_AMD64_SEV_ES_ENABLED BIT_ULL(MSR_AMD64_SEV_ES_ENABLED_BIT) > #define MSR_AMD64_SEV_SNP_ENABLED BIT_ULL(MSR_AMD64_SEV_SNP_ENABLED_BIT) > +#define MSR_AMD64_RMP_BASE 0xc0010132 > +#define MSR_AMD64_RMP_END 0xc0010133 > > /* SNP feature bits enabled by the hypervisor */ > #define MSR_AMD64_SNP_VTOM BIT_ULL(3) > @@ -690,7 +692,14 @@ > #define MSR_K8_TOP_MEM2 0xc001001d > #define MSR_AMD64_SYSCFG 0xc0010010 > #define MSR_AMD64_SYSCFG_MEM_ENCRYPT_BIT 23 > -#define MSR_AMD64_SYSCFG_MEM_ENCRYPT BIT_ULL(MSR_AMD64_SYSCFG_MEM_ENCRYPT_BIT) > +#define MSR_AMD64_SYSCFG_MEM_ENCRYPT BIT_ULL(MSR_AMD64_SYSCFG_MEM_ENCRYPT_BIT) > +#define MSR_AMD64_SYSCFG_SNP_EN_BIT 24 > +#define MSR_AMD64_SYSCFG_SNP_EN BIT_ULL(MSR_AMD64_SYSCFG_SNP_EN_BIT) > +#define MSR_AMD64_SYSCFG_SNP_VMPL_EN_BIT 25 > +#define MSR_AMD64_SYSCFG_SNP_VMPL_EN BIT_ULL(MSR_AMD64_SYSCFG_SNP_VMPL_EN_BIT) > +#define MSR_AMD64_SYSCFG_MFDM_BIT 19 > +#define MSR_AMD64_SYSCFG_MFDM BIT_ULL(MSR_AMD64_SYSCFG_MFDM_BIT) > + > #define MSR_K8_INT_PENDING_MSG 0xc0010055 > /* C1E active bits in int pending message */ > #define K8_INTP_C1E_ACTIVE_MASK 0x18000000 > diff --git a/arch/x86/include/asm/sev.h b/arch/x86/include/asm/sev.h > index 5b4a1ce3d368..b05fcd0ab7e4 100644 > --- a/arch/x86/include/asm/sev.h > +++ b/arch/x86/include/asm/sev.h > @@ -243,4 +243,10 @@ static inline u64 snp_get_unsupported_features(u64 status) { return 0; } > static inline u64 sev_get_status(void) { return 0; } > #endif > > +#ifdef CONFIG_KVM_AMD_SEV > +bool snp_get_rmptable_info(u64 *start, u64 *len); > +#else > +static inline bool snp_get_rmptable_info(u64 *start, u64 *len) { return false; } > +#endif > + > #endif > diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c > index 14ee7f750cc7..6cc2074fcea3 100644 > --- a/arch/x86/kernel/cpu/amd.c > +++ b/arch/x86/kernel/cpu/amd.c > @@ -20,6 +20,7 @@ > #include <asm/delay.h> > #include <asm/debugreg.h> > #include <asm/resctrl.h> > +#include <asm/sev.h> > > #ifdef CONFIG_X86_64 > # include <asm/mmconfig.h> > @@ -618,6 +619,20 @@ static void bsp_init_amd(struct cpuinfo_x86 *c) > resctrl_cpu_detect(c); > } > > +static bool early_rmptable_check(void) > +{ > + u64 rmp_base, rmp_size; > + > + /* > + * For early BSP initialization, max_pfn won't be set up yet, wait until > + * it is set before performing the RMP table calculations. > + */ > + if (!max_pfn) > + return true; To make this so that AutoIBRS isn't disabled should an RMP table not have been allocated by BIOS, lets delete the above check. It then becomes just a check for whether the RMP table has been allocated by BIOS, enabled by selecting a BIOS option, which shows intent for running SNP guests. This way, the AutoIBRS mitigation can be used if SNP is not possible on the system. Thanks, Tom > + > + return snp_get_rmptable_info(&rmp_base, &rmp_size); > +} > + > static void early_detect_mem_encrypt(struct cpuinfo_x86 *c) > { > u64 msr; > @@ -659,6 +674,9 @@ static void early_detect_mem_encrypt(struct cpuinfo_x86 *c) > if (!(msr & MSR_K7_HWCR_SMMLOCK)) > goto clear_sev; > > + if (cpu_has(c, X86_FEATURE_SEV_SNP) && !early_rmptable_check()) > + goto clear_snp; > + > return; > > clear_all: > @@ -666,6 +684,7 @@ static void early_detect_mem_encrypt(struct cpuinfo_x86 *c) > clear_sev: > setup_clear_cpu_cap(X86_FEATURE_SEV); > setup_clear_cpu_cap(X86_FEATURE_SEV_ES); > +clear_snp: > setup_clear_cpu_cap(X86_FEATURE_SEV_SNP); > } > } > diff --git a/arch/x86/virt/svm/Makefile b/arch/x86/virt/svm/Makefile > new file mode 100644 > index 000000000000..ef2a31bdcc70 > --- /dev/null > +++ b/arch/x86/virt/svm/Makefile > @@ -0,0 +1,3 @@ > +# SPDX-License-Identifier: GPL-2.0 > + > +obj-$(CONFIG_KVM_AMD_SEV) += sev.o > diff --git a/arch/x86/virt/svm/sev.c b/arch/x86/virt/svm/sev.c > new file mode 100644 > index 000000000000..8b9ed72489e4 > --- /dev/null > +++ b/arch/x86/virt/svm/sev.c > @@ -0,0 +1,239 @@ > +// SPDX-License-Identifier: GPL-2.0-only > +/* > + * AMD SVM-SEV Host Support. > + * > + * Copyright (C) 2023 Advanced Micro Devices, Inc. > + * > + * Author: Ashish Kalra <ashish.kalra@amd.com> > + * > + */ > + > +#include <linux/cc_platform.h> > +#include <linux/printk.h> > +#include <linux/mm_types.h> > +#include <linux/set_memory.h> > +#include <linux/memblock.h> > +#include <linux/kernel.h> > +#include <linux/mm.h> > +#include <linux/cpumask.h> > +#include <linux/iommu.h> > +#include <linux/amd-iommu.h> > + > +#include <asm/sev.h> > +#include <asm/processor.h> > +#include <asm/setup.h> > +#include <asm/svm.h> > +#include <asm/smp.h> > +#include <asm/cpu.h> > +#include <asm/apic.h> > +#include <asm/cpuid.h> > +#include <asm/cmdline.h> > +#include <asm/iommu.h> > + > +/* > + * The RMP entry format is not architectural. The format is defined in PPR > + * Family 19h Model 01h, Rev B1 processor. > + */ > +struct rmpentry { > + u64 assigned : 1, > + pagesize : 1, > + immutable : 1, > + rsvd1 : 9, > + gpa : 39, > + asid : 10, > + vmsa : 1, > + validated : 1, > + rsvd2 : 1; > + u64 rsvd3; > +} __packed; > + > +/* > + * The first 16KB from the RMP_BASE is used by the processor for the > + * bookkeeping, the range needs to be added during the RMP entry lookup. > + */ > +#define RMPTABLE_CPU_BOOKKEEPING_SZ 0x4000 > + > +static struct rmpentry *rmptable_start __ro_after_init; > +static u64 rmptable_max_pfn __ro_after_init; > + > +#undef pr_fmt > +#define pr_fmt(fmt) "SEV-SNP: " fmt > + > +static int __mfd_enable(unsigned int cpu) > +{ > + u64 val; > + > + if (!cpu_feature_enabled(X86_FEATURE_SEV_SNP)) > + return 0; > + > + rdmsrl(MSR_AMD64_SYSCFG, val); > + > + val |= MSR_AMD64_SYSCFG_MFDM; > + > + wrmsrl(MSR_AMD64_SYSCFG, val); > + > + return 0; > +} > + > +static __init void mfd_enable(void *arg) > +{ > + __mfd_enable(smp_processor_id()); > +} > + > +static int __snp_enable(unsigned int cpu) > +{ > + u64 val; > + > + if (!cpu_feature_enabled(X86_FEATURE_SEV_SNP)) > + return 0; > + > + rdmsrl(MSR_AMD64_SYSCFG, val); > + > + val |= MSR_AMD64_SYSCFG_SNP_EN; > + val |= MSR_AMD64_SYSCFG_SNP_VMPL_EN; > + > + wrmsrl(MSR_AMD64_SYSCFG, val); > + > + return 0; > +} > + > +static __init void snp_enable(void *arg) > +{ > + __snp_enable(smp_processor_id()); > +} > + > +#define RMP_ADDR_MASK GENMASK_ULL(51, 13) > + > +bool snp_get_rmptable_info(u64 *start, u64 *len) > +{ > + u64 max_rmp_pfn, calc_rmp_sz, rmp_sz, rmp_base, rmp_end; > + > + rdmsrl(MSR_AMD64_RMP_BASE, rmp_base); > + rdmsrl(MSR_AMD64_RMP_END, rmp_end); > + > + if (!(rmp_base & RMP_ADDR_MASK) || !(rmp_end & RMP_ADDR_MASK)) { > + pr_err("Memory for the RMP table has not been reserved by BIOS\n"); > + return false; > + } > + > + if (rmp_base > rmp_end) { > + pr_err("RMP configuration not valid: base=%#llx, end=%#llx\n", rmp_base, rmp_end); > + return false; > + } > + > + rmp_sz = rmp_end - rmp_base + 1; > + > + /* > + * Calculate the amount the memory that must be reserved by the BIOS to > + * address the whole RAM, including the bookkeeping area. The RMP itself > + * must also be covered. > + */ > + max_rmp_pfn = max_pfn; > + if (PHYS_PFN(rmp_end) > max_pfn) > + max_rmp_pfn = PHYS_PFN(rmp_end); > + > + calc_rmp_sz = (max_rmp_pfn << 4) + RMPTABLE_CPU_BOOKKEEPING_SZ; > + > + if (calc_rmp_sz > rmp_sz) { > + pr_err("Memory reserved for the RMP table does not cover full system RAM (expected 0x%llx got 0x%llx)\n", > + calc_rmp_sz, rmp_sz); > + return false; > + } > + > + *start = rmp_base; > + *len = rmp_sz; > + > + return true; > +} > + > +static __init int __snp_rmptable_init(void) > +{ > + u64 rmp_base, rmp_size; > + void *rmp_start; > + u64 val; > + > + if (!snp_get_rmptable_info(&rmp_base, &rmp_size)) > + return 1; > + > + pr_info("RMP table physical address [0x%016llx - 0x%016llx]\n", > + rmp_base, rmp_base + rmp_size - 1); > + > + rmp_start = memremap(rmp_base, rmp_size, MEMREMAP_WB); > + if (!rmp_start) { > + pr_err("Failed to map RMP table addr 0x%llx size 0x%llx\n", rmp_base, rmp_size); > + return 1; > + } > + > + /* > + * Check if SEV-SNP is already enabled, this can happen in case of > + * kexec boot. > + */ > + rdmsrl(MSR_AMD64_SYSCFG, val); > + if (val & MSR_AMD64_SYSCFG_SNP_EN) > + goto skip_enable; > + > + /* Initialize the RMP table to zero */ > + memset(rmp_start, 0, rmp_size); > + > + /* Flush the caches to ensure that data is written before SNP is enabled. */ > + wbinvd_on_all_cpus(); > + > + /* MFDM must be enabled on all the CPUs prior to enabling SNP. */ > + on_each_cpu(mfd_enable, NULL, 1); > + > + /* Enable SNP on all CPUs. */ > + on_each_cpu(snp_enable, NULL, 1); > + > +skip_enable: > + rmp_start += RMPTABLE_CPU_BOOKKEEPING_SZ; > + rmp_size -= RMPTABLE_CPU_BOOKKEEPING_SZ; > + > + rmptable_start = (struct rmpentry *)rmp_start; > + rmptable_max_pfn = rmp_size / sizeof(struct rmpentry) - 1; > + > + return 0; > +} > + > +static int __init snp_rmptable_init(void) > +{ > + int family, model; > + > + if (!cpu_feature_enabled(X86_FEATURE_SEV_SNP)) > + return 0; > + > + family = boot_cpu_data.x86; > + model = boot_cpu_data.x86_model; > + > + /* > + * RMP table entry format is not architectural and it can vary by processor and > + * is defined by the per-processor PPR. Restrict SNP support on the known CPU > + * model and family for which the RMP table entry format is currently defined for. > + */ > + if (family != 0x19 || model > 0xaf) > + goto nosnp; > + > + if (amd_iommu_snp_enable()) > + goto nosnp; > + > + if (__snp_rmptable_init()) > + goto nosnp; > + > + cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "x86/rmptable_init:online", __snp_enable, NULL); > + > + return 0; > + > +nosnp: > + setup_clear_cpu_cap(X86_FEATURE_SEV_SNP); > + return -ENOSYS; > +} > + > +/* > + * This must be called after the PCI subsystem. This is because amd_iommu_snp_enable() > + * is called to ensure the IOMMU supports the SEV-SNP feature, which can only be > + * called after subsys_initcall(). > + * > + * NOTE: IOMMU is enforced by SNP to ensure that hypervisor cannot program DMA > + * directly into guest private memory. In case of SNP, the IOMMU ensures that > + * the page(s) used for DMA are hypervisor owned. > + */ > +fs_initcall(snp_rmptable_init); > diff --git a/drivers/iommu/amd/init.c b/drivers/iommu/amd/init.c > index 45efb7e5d725..1c9924de607a 100644 > --- a/drivers/iommu/amd/init.c > +++ b/drivers/iommu/amd/init.c > @@ -3802,7 +3802,7 @@ int amd_iommu_pc_set_reg(struct amd_iommu *iommu, u8 bank, u8 cntr, u8 fxn, u64 > return iommu_pc_get_set_reg(iommu, bank, cntr, fxn, value, true); > } > > -#ifdef CONFIG_AMD_MEM_ENCRYPT > +#ifdef CONFIG_KVM_AMD_SEV > int amd_iommu_snp_enable(void) > { > /* > diff --git a/include/linux/amd-iommu.h b/include/linux/amd-iommu.h > index 99a5201d9e62..55fc03cb3968 100644 > --- a/include/linux/amd-iommu.h > +++ b/include/linux/amd-iommu.h > @@ -205,7 +205,7 @@ int amd_iommu_pc_get_reg(struct amd_iommu *iommu, u8 bank, u8 cntr, u8 fxn, > u64 *value); > struct amd_iommu *get_amd_iommu(unsigned int idx); > > -#ifdef CONFIG_AMD_MEM_ENCRYPT > +#ifdef CONFIG_KVM_AMD_SEV > int amd_iommu_snp_enable(void); > #endif >
On Mon, Oct 16, 2023 at 08:27:35AM -0500, Michael Roth wrote: > +static bool early_rmptable_check(void) > +{ > + u64 rmp_base, rmp_size; > + > + /* > + * For early BSP initialization, max_pfn won't be set up yet, wait until > + * it is set before performing the RMP table calculations. > + */ > + if (!max_pfn) > + return true; This already says that this is called at the wrong point during init. Right now we have early_identify_cpu -> early_init_amd -> early_detect_mem_encrypt which runs only on the BSP but then early_init_amd() is called in init_amd() too so that it takes care of the APs too. Which ends up doing a lot of unnecessary work on each AP in early_detect_mem_encrypt() like calculating the RMP size on each AP unnecessarily where this needs to happen exactly once. Is there any reason why this function cannot be moved to init_amd() where it'll do the normal, per-AP init? And the stuff that needs to happen once, needs to be called once too. > + > + return snp_get_rmptable_info(&rmp_base, &rmp_size); > +} > + > static void early_detect_mem_encrypt(struct cpuinfo_x86 *c) > { > u64 msr; > @@ -659,6 +674,9 @@ static void early_detect_mem_encrypt(struct cpuinfo_x86 *c) > if (!(msr & MSR_K7_HWCR_SMMLOCK)) > goto clear_sev; > > + if (cpu_has(c, X86_FEATURE_SEV_SNP) && !early_rmptable_check()) > + goto clear_snp; > + > return; > > clear_all: > @@ -666,6 +684,7 @@ static void early_detect_mem_encrypt(struct cpuinfo_x86 *c) > clear_sev: > setup_clear_cpu_cap(X86_FEATURE_SEV); > setup_clear_cpu_cap(X86_FEATURE_SEV_ES); > +clear_snp: > setup_clear_cpu_cap(X86_FEATURE_SEV_SNP); > } > } ... > +bool snp_get_rmptable_info(u64 *start, u64 *len) > +{ > + u64 max_rmp_pfn, calc_rmp_sz, rmp_sz, rmp_base, rmp_end; > + > + rdmsrl(MSR_AMD64_RMP_BASE, rmp_base); > + rdmsrl(MSR_AMD64_RMP_END, rmp_end); > + > + if (!(rmp_base & RMP_ADDR_MASK) || !(rmp_end & RMP_ADDR_MASK)) { > + pr_err("Memory for the RMP table has not been reserved by BIOS\n"); > + return false; > + } If you're masking off bits 0-12 above... > + > + if (rmp_base > rmp_end) { ... why aren't you using the masked out vars further on? I know, the hw will say, yeah, those bits are 0 but still. IOW, do: rmp_base &= RMP_ADDR_MASK; rmp_end &= RMP_ADDR_MASK; after reading them. > + pr_err("RMP configuration not valid: base=%#llx, end=%#llx\n", rmp_base, rmp_end); > + return false; > + } > + > + rmp_sz = rmp_end - rmp_base + 1; > + > + /* > + * Calculate the amount the memory that must be reserved by the BIOS to > + * address the whole RAM, including the bookkeeping area. The RMP itself > + * must also be covered. > + */ > + max_rmp_pfn = max_pfn; > + if (PHYS_PFN(rmp_end) > max_pfn) > + max_rmp_pfn = PHYS_PFN(rmp_end); > + > + calc_rmp_sz = (max_rmp_pfn << 4) + RMPTABLE_CPU_BOOKKEEPING_SZ; > + > + if (calc_rmp_sz > rmp_sz) { > + pr_err("Memory reserved for the RMP table does not cover full system RAM (expected 0x%llx got 0x%llx)\n", > + calc_rmp_sz, rmp_sz); > + return false; > + } > + > + *start = rmp_base; > + *len = rmp_sz; > + > + return true; > +} > + > +static __init int __snp_rmptable_init(void) > +{ > + u64 rmp_base, rmp_size; > + void *rmp_start; > + u64 val; > + > + if (!snp_get_rmptable_info(&rmp_base, &rmp_size)) > + return 1; > + > + pr_info("RMP table physical address [0x%016llx - 0x%016llx]\n", That's "RMP table physical range" > + rmp_base, rmp_base + rmp_size - 1); > + > + rmp_start = memremap(rmp_base, rmp_size, MEMREMAP_WB); > + if (!rmp_start) { > + pr_err("Failed to map RMP table addr 0x%llx size 0x%llx\n", rmp_base, rmp_size); No need to dump rmp_base and rmp_size again here - you're dumping them above. > + return 1; > + } > + > + /* > + * Check if SEV-SNP is already enabled, this can happen in case of > + * kexec boot. > + */ > + rdmsrl(MSR_AMD64_SYSCFG, val); > + if (val & MSR_AMD64_SYSCFG_SNP_EN) > + goto skip_enable; > + > + /* Initialize the RMP table to zero */ Again: useless comment. > + memset(rmp_start, 0, rmp_size); > + > + /* Flush the caches to ensure that data is written before SNP is enabled. */ > + wbinvd_on_all_cpus(); > + > + /* MFDM must be enabled on all the CPUs prior to enabling SNP. */ First of all, use the APM bit name here pls: MtrrFixDramModEn. And then, for the life of me, I can't find any mention in the APM why this bit is needed. Neither in "15.36.2 Enabling SEV-SNP" nor in "15.34.3 Enabling SEV". Looking at the bit defintions of WrMem an RdMem - read and write requests get directed to system memory instead of MMIO so I guess you don't want to be able to write MMIO for certain physical ranges when SNP is enabled but it'll be good to have this properly explained instead of a "this must happen" information-less sentence. > + on_each_cpu(mfd_enable, NULL, 1); > + > + /* Enable SNP on all CPUs. */ Useless comment. > + on_each_cpu(snp_enable, NULL, 1); > + > +skip_enable: > + rmp_start += RMPTABLE_CPU_BOOKKEEPING_SZ; > + rmp_size -= RMPTABLE_CPU_BOOKKEEPING_SZ; > + > + rmptable_start = (struct rmpentry *)rmp_start; > + rmptable_max_pfn = rmp_size / sizeof(struct rmpentry) - 1; > + > + return 0; > +} > + > +static int __init snp_rmptable_init(void) > +{ > + int family, model; > + > + if (!cpu_feature_enabled(X86_FEATURE_SEV_SNP)) > + return 0; > + > + family = boot_cpu_data.x86; > + model = boot_cpu_data.x86_model; Looks useless - just use boot_cpu_data directly below. As mentioned here already https://lore.kernel.org/all/Y9ubi0i4Z750gdMm@zn.tnic/ And I already mentioned that for v9: https://lore.kernel.org/r/20230621094236.GZZJLGDAicp1guNPvD@fat_crate.local Next time I'm NAKing this patch until you incorporate all review comments or you give a technical reason why you disagree with them. > + /* > + * RMP table entry format is not architectural and it can vary by processor and > + * is defined by the per-processor PPR. Restrict SNP support on the known CPU > + * model and family for which the RMP table entry format is currently defined for. > + */ > + if (family != 0x19 || model > 0xaf) > + goto nosnp; > + > + if (amd_iommu_snp_enable()) > + goto nosnp; > + > + if (__snp_rmptable_init()) > + goto nosnp; > + > + cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "x86/rmptable_init:online", __snp_enable, NULL); > + > + return 0; > + > +nosnp: > + setup_clear_cpu_cap(X86_FEATURE_SEV_SNP); > + return -ENOSYS; > +} > + > +/* > + * This must be called after the PCI subsystem. This is because amd_iommu_snp_enable() > + * is called to ensure the IOMMU supports the SEV-SNP feature, which can only be > + * called after subsys_initcall(). > + * > + * NOTE: IOMMU is enforced by SNP to ensure that hypervisor cannot program DMA > + * directly into guest private memory. In case of SNP, the IOMMU ensures that > + * the page(s) used for DMA are hypervisor owned. > + */ > +fs_initcall(snp_rmptable_init); This looks backwards. AFAICT, the IOMMU code should call arch code to enable SNP at the right time, not the other way around - arch code calling driver code. Especially if the SNP table enablement depends on some exact IOMMU init_state: if (init_state > IOMMU_ENABLED) { pr_err("SNP: Too late to enable SNP for IOMMU.\n");
On 11/7/23 10:31, Borislav Petkov wrote: > On Mon, Oct 16, 2023 at 08:27:35AM -0500, Michael Roth wrote: >> +static bool early_rmptable_check(void) >> +{ >> + u64 rmp_base, rmp_size; >> + >> + /* >> + * For early BSP initialization, max_pfn won't be set up yet, wait until >> + * it is set before performing the RMP table calculations. >> + */ >> + if (!max_pfn) >> + return true; > > This already says that this is called at the wrong point during init. (Just addressing some of your comments, Mike to address others) I commented earlier that we can remove this check and then it becomes purely a check for whether the RMP table has been pre-allocated by the BIOS. It is done early here in order to allow for AutoIBRS to be used as a Spectre mitigation. If an RMP table has not been allocated by BIOS then the SNP feature can be cleared, allowing AutoIBRS to be used, if available. > > Right now we have > > early_identify_cpu -> early_init_amd -> early_detect_mem_encrypt > > which runs only on the BSP but then early_init_amd() is called in > init_amd() too so that it takes care of the APs too. > > Which ends up doing a lot of unnecessary work on each AP in > early_detect_mem_encrypt() like calculating the RMP size on each AP > unnecessarily where this needs to happen exactly once. > > Is there any reason why this function cannot be moved to init_amd() > where it'll do the normal, per-AP init? It needs to be called early enough to allow for AutoIBRS to not be disabled just because SNP is supported. By calling it where it is currently called, the SNP feature can be cleared if, even though supported, SNP can't be used, allowing AutoIBRS to be used as a more performant Spectre mitigation. > > And the stuff that needs to happen once, needs to be called once too. > >> + >> + return snp_get_rmptable_info(&rmp_base, &rmp_size); >> +} >> + >> static void early_detect_mem_encrypt(struct cpuinfo_x86 *c) >> { >> u64 msr; >> @@ -659,6 +674,9 @@ static void early_detect_mem_encrypt(struct cpuinfo_x86 *c) >> if (!(msr & MSR_K7_HWCR_SMMLOCK)) >> goto clear_sev; >> >> + if (cpu_has(c, X86_FEATURE_SEV_SNP) && !early_rmptable_check()) >> + goto clear_snp; >> + >> return; >> >> clear_all: >> @@ -666,6 +684,7 @@ static void early_detect_mem_encrypt(struct cpuinfo_x86 *c) >> clear_sev: >> setup_clear_cpu_cap(X86_FEATURE_SEV); >> setup_clear_cpu_cap(X86_FEATURE_SEV_ES); >> +clear_snp: >> setup_clear_cpu_cap(X86_FEATURE_SEV_SNP); >> } >> } > > ... > >> +bool snp_get_rmptable_info(u64 *start, u64 *len) >> +{ >> + u64 max_rmp_pfn, calc_rmp_sz, rmp_sz, rmp_base, rmp_end; >> + >> + rdmsrl(MSR_AMD64_RMP_BASE, rmp_base); >> + rdmsrl(MSR_AMD64_RMP_END, rmp_end); >> + >> + if (!(rmp_base & RMP_ADDR_MASK) || !(rmp_end & RMP_ADDR_MASK)) { >> + pr_err("Memory for the RMP table has not been reserved by BIOS\n"); >> + return false; >> + } > > If you're masking off bits 0-12 above... Because the RMP_END MSR, most specifically, has a default value of 0x1fff, where bits [12:0] are reserved. So to specifically check if the MSR has been set to a non-zero end value, the bit are masked off. However, ... > >> + >> + if (rmp_base > rmp_end) { > > ... why aren't you using the masked out vars further on? ... the full values can be used once they have been determined to not be zero. > > I know, the hw will say, yeah, those bits are 0 but still. IOW, do: > > rmp_base &= RMP_ADDR_MASK; > rmp_end &= RMP_ADDR_MASK; > > after reading them. You can't for RMP_END since it will always have bits 12:0 set to one and you shouldn't clear them once you know that the MSR has truly been set. Thanks, Tom >
Hello Boris, Addressing of some of the remaining comments: On 11/7/2023 10:31 AM, Borislav Petkov wrote: > On Mon, Oct 16, 2023 at 08:27:35AM -0500, Michael Roth wrote: >> +static bool early_rmptable_check(void) >> +{ >> + u64 rmp_base, rmp_size; >> + >> + /* >> + * For early BSP initialization, max_pfn won't be set up yet, wait until >> + * it is set before performing the RMP table calculations. >> + */ >> + if (!max_pfn) >> + return true; > > This already says that this is called at the wrong point during init. > > Right now we have > > early_identify_cpu -> early_init_amd -> early_detect_mem_encrypt > > which runs only on the BSP but then early_init_amd() is called in > init_amd() too so that it takes care of the APs too. > > Which ends up doing a lot of unnecessary work on each AP in > early_detect_mem_encrypt() like calculating the RMP size on each AP > unnecessarily where this needs to happen exactly once. > > Is there any reason why this function cannot be moved to init_amd() > where it'll do the normal, per-AP init? > > And the stuff that needs to happen once, needs to be called once too. > >> + >> + return snp_get_rmptable_info(&rmp_base, &rmp_size); >> +} >> + >> static void early_detect_mem_encrypt(struct cpuinfo_x86 *c) >> { >> u64 msr; >> @@ -659,6 +674,9 @@ static void early_detect_mem_encrypt(struct cpuinfo_x86 *c) >> if (!(msr & MSR_K7_HWCR_SMMLOCK)) >> goto clear_sev; >> >> + if (cpu_has(c, X86_FEATURE_SEV_SNP) && !early_rmptable_check()) >> + goto clear_snp; >> + >> return; >> >> clear_all: >> @@ -666,6 +684,7 @@ static void early_detect_mem_encrypt(struct cpuinfo_x86 *c) >> clear_sev: >> setup_clear_cpu_cap(X86_FEATURE_SEV); >> setup_clear_cpu_cap(X86_FEATURE_SEV_ES); >> +clear_snp: >> setup_clear_cpu_cap(X86_FEATURE_SEV_SNP); >> } >> } > > ... > >> +bool snp_get_rmptable_info(u64 *start, u64 *len) >> +{ >> + u64 max_rmp_pfn, calc_rmp_sz, rmp_sz, rmp_base, rmp_end; >> + >> + rdmsrl(MSR_AMD64_RMP_BASE, rmp_base); >> + rdmsrl(MSR_AMD64_RMP_END, rmp_end); >> + >> + if (!(rmp_base & RMP_ADDR_MASK) || !(rmp_end & RMP_ADDR_MASK)) { >> + pr_err("Memory for the RMP table has not been reserved by BIOS\n"); >> + return false; >> + } > > If you're masking off bits 0-12 above... > >> + >> + if (rmp_base > rmp_end) { > > ... why aren't you using the masked out vars further on? > > I know, the hw will say, yeah, those bits are 0 but still. IOW, do: > > rmp_base &= RMP_ADDR_MASK; > rmp_end &= RMP_ADDR_MASK; > > after reading them. > >> + pr_err("RMP configuration not valid: base=%#llx, end=%#llx\n", rmp_base, rmp_end); >> + return false; >> + } >> + >> + rmp_sz = rmp_end - rmp_base + 1; >> + >> + /* >> + * Calculate the amount the memory that must be reserved by the BIOS to >> + * address the whole RAM, including the bookkeeping area. The RMP itself >> + * must also be covered. >> + */ >> + max_rmp_pfn = max_pfn; >> + if (PHYS_PFN(rmp_end) > max_pfn) >> + max_rmp_pfn = PHYS_PFN(rmp_end); >> + >> + calc_rmp_sz = (max_rmp_pfn << 4) + RMPTABLE_CPU_BOOKKEEPING_SZ; >> + >> + if (calc_rmp_sz > rmp_sz) { >> + pr_err("Memory reserved for the RMP table does not cover full system RAM (expected 0x%llx got 0x%llx)\n", >> + calc_rmp_sz, rmp_sz); >> + return false; >> + } >> + >> + *start = rmp_base; >> + *len = rmp_sz; >> + >> + return true; >> +} >> + >> +static __init int __snp_rmptable_init(void) >> +{ >> + u64 rmp_base, rmp_size; >> + void *rmp_start; >> + u64 val; >> + >> + if (!snp_get_rmptable_info(&rmp_base, &rmp_size)) >> + return 1; >> + >> + pr_info("RMP table physical address [0x%016llx - 0x%016llx]\n", > > That's "RMP table physical range" > >> + rmp_base, rmp_base + rmp_size - 1); >> + >> + rmp_start = memremap(rmp_base, rmp_size, MEMREMAP_WB); >> + if (!rmp_start) { >> + pr_err("Failed to map RMP table addr 0x%llx size 0x%llx\n", rmp_base, rmp_size); > > No need to dump rmp_base and rmp_size again here - you're dumping them > above. > >> + return 1; >> + } >> + >> + /* >> + * Check if SEV-SNP is already enabled, this can happen in case of >> + * kexec boot. >> + */ >> + rdmsrl(MSR_AMD64_SYSCFG, val); >> + if (val & MSR_AMD64_SYSCFG_SNP_EN) >> + goto skip_enable; >> + >> + /* Initialize the RMP table to zero */ > > Again: useless comment. > >> + memset(rmp_start, 0, rmp_size); >> + >> + /* Flush the caches to ensure that data is written before SNP is enabled. */ >> + wbinvd_on_all_cpus(); >> + >> + /* MFDM must be enabled on all the CPUs prior to enabling SNP. */ > > First of all, use the APM bit name here pls: MtrrFixDramModEn. > > And then, for the life of me, I can't find any mention in the APM why > this bit is needed. Neither in "15.36.2 Enabling SEV-SNP" nor in > "15.34.3 Enabling SEV". > > Looking at the bit defintions of WrMem an RdMem - read and write > requests get directed to system memory instead of MMIO so I guess you > don't want to be able to write MMIO for certain physical ranges when SNP > is enabled but it'll be good to have this properly explained instead of > a "this must happen" information-less sentence. This is a per-requisite for SNP_INIT as per the SNP Firmware ABI specifications, section 8.8.2: From the SNP FW ABI specs: If INIT_RMP is 1, then the firmware ensures the following system requirements are met: • SYSCFG[MemoryEncryptionModEn] must be set to 1 across all cores. (SEV must be enabled.) • SYSCFG[SecureNestedPagingEn] must be set to 1 across all cores. • SYSCFG[VMPLEn] must be set to 1 across all cores. • SYSCFG[MFDM] must be set to 1 across all cores. • VM_HSAVE_PA (MSR C001_0117) must be set to 0h across all cores. • HWCR[SmmLock] (MSR C001_0015) must be set to 1 across all cores. So, this platform enabling code for SNP needs to ensure that these conditions are met before SNP_INIT is called. > >> + on_each_cpu(mfd_enable, NULL, 1); >> + >> + /* Enable SNP on all CPUs. */ > > Useless comment. > >> + on_each_cpu(snp_enable, NULL, 1); >> + >> +skip_enable: >> + rmp_start += RMPTABLE_CPU_BOOKKEEPING_SZ; >> + rmp_size -= RMPTABLE_CPU_BOOKKEEPING_SZ; >> + >> + rmptable_start = (struct rmpentry *)rmp_start; >> + rmptable_max_pfn = rmp_size / sizeof(struct rmpentry) - 1; >> + >> + return 0; >> +} >> + >> +static int __init snp_rmptable_init(void) >> +{ >> + int family, model; >> + >> + if (!cpu_feature_enabled(X86_FEATURE_SEV_SNP)) >> + return 0; >> + >> + family = boot_cpu_data.x86; >> + model = boot_cpu_data.x86_model; > > Looks useless - just use boot_cpu_data directly below. > > As mentioned here already https://lore.kernel.org/all/Y9ubi0i4Z750gdMm@zn.tnic/ > > And I already mentioned that for v9: > > https://lore.kernel.org/r/20230621094236.GZZJLGDAicp1guNPvD@fat_crate.local > > Next time I'm NAKing this patch until you incorporate all review > comments or you give a technical reason why you disagree with them. > >> + /* >> + * RMP table entry format is not architectural and it can vary by processor and >> + * is defined by the per-processor PPR. Restrict SNP support on the known CPU >> + * model and family for which the RMP table entry format is currently defined for. >> + */ >> + if (family != 0x19 || model > 0xaf) >> + goto nosnp; >> + >> + if (amd_iommu_snp_enable()) >> + goto nosnp; >> + >> + if (__snp_rmptable_init()) >> + goto nosnp; >> + >> + cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "x86/rmptable_init:online", __snp_enable, NULL); >> + >> + return 0; >> + >> +nosnp: >> + setup_clear_cpu_cap(X86_FEATURE_SEV_SNP); >> + return -ENOSYS; >> +} >> + >> +/* >> + * This must be called after the PCI subsystem. This is because amd_iommu_snp_enable() >> + * is called to ensure the IOMMU supports the SEV-SNP feature, which can only be >> + * called after subsys_initcall(). >> + * >> + * NOTE: IOMMU is enforced by SNP to ensure that hypervisor cannot program DMA >> + * directly into guest private memory. In case of SNP, the IOMMU ensures that >> + * the page(s) used for DMA are hypervisor owned. >> + */ >> +fs_initcall(snp_rmptable_init); > > This looks backwards. AFAICT, the IOMMU code should call arch code to > enable SNP at the right time, not the other way around - arch code > calling driver code. > > Especially if the SNP table enablement depends on some exact IOMMU > init_state: > > if (init_state > IOMMU_ENABLED) { > pr_err("SNP: Too late to enable SNP for IOMMU.\n"); > > This is again as per SNP_INIT requirements, that SNP support is enabled in the IOMMU before SNP_INIT is done. The above function snp_rmptable_init() only calls the IOMMU driver to enable SNP support when it has detected and enabled platform support for SNP. It is not that IOMMU driver has to call the arch code to enable SNP at the right time but it is the other way around that once platform support for SNP is enabled then the IOMMU driver has to be called to enable the same for the IOMMU and this needs to be done before the CCP driver is loaded and does SNP_INIT. Thanks, Ashish
On Tue, Nov 07, 2023 at 12:32:58PM -0600, Tom Lendacky wrote: > It needs to be called early enough to allow for AutoIBRS to not be disabled > just because SNP is supported. By calling it where it is currently called, > the SNP feature can be cleared if, even though supported, SNP can't be used, > allowing AutoIBRS to be used as a more performant Spectre mitigation. So far so good. However, early_rmptable_check -> snp_get_rmptable_info is unnecessary work which happens on every AP for no reason whatsoever. That's reading RMP_BASE and RMP_END, doing the same checks which it did on the BSP and then throwing away the computed rmp_base and rmp_sz, all once per AP. I don't mind doing early work which needs to be done only once. I mind doing work which needs to be done only once, on every AP. Thx.
On Tue, Nov 07, 2023 at 01:00:00PM -0600, Kalra, Ashish wrote: > > First of all, use the APM bit name here pls: MtrrFixDramModEn. > > > > And then, for the life of me, I can't find any mention in the APM why > > this bit is needed. Neither in "15.36.2 Enabling SEV-SNP" nor in > > "15.34.3 Enabling SEV". > > > > Looking at the bit defintions of WrMem an RdMem - read and write > > requests get directed to system memory instead of MMIO so I guess you > > don't want to be able to write MMIO for certain physical ranges when SNP > > is enabled but it'll be good to have this properly explained instead of > > a "this must happen" information-less sentence. > > This is a per-requisite for SNP_INIT as per the SNP Firmware ABI > specifications, section 8.8.2: Did you even read the text you're responding to? > > This looks backwards. AFAICT, the IOMMU code should call arch code to > > enable SNP at the right time, not the other way around - arch code > > calling driver code. > > > > Especially if the SNP table enablement depends on some exact IOMMU > > init_state: > > > > if (init_state > IOMMU_ENABLED) { > > pr_err("SNP: Too late to enable SNP for IOMMU.\n"); > > > > > > This is again as per SNP_INIT requirements, that SNP support is enabled in > the IOMMU before SNP_INIT is done. The above function snp_rmptable_init() > only calls the IOMMU driver to enable SNP support when it has detected and > enabled platform support for SNP. >v > It is not that IOMMU driver has to call the arch code to enable SNP at the > right time but it is the other way around that once platform support for SNP > is enabled then the IOMMU driver has to be called to enable the same for the > IOMMU and this needs to be done before the CCP driver is loaded and does > SNP_INIT. You again didn't read the text you're responding to. Arch code does not call drivers - arch code sets up the arch and provides facilities which the drivers use.
On Tue, Nov 07, 2023 at 08:19:31PM +0100, Borislav Petkov wrote: > Arch code does not call drivers - arch code sets up the arch and > provides facilities which the drivers use. IOW (just an example diff): diff --git a/drivers/iommu/amd/init.c b/drivers/iommu/amd/init.c index 1c9924de607a..00cdbc844961 100644 --- a/drivers/iommu/amd/init.c +++ b/drivers/iommu/amd/init.c @@ -3290,6 +3290,7 @@ static int __init state_next(void) break; case IOMMU_ENABLED: register_syscore_ops(&amd_iommu_syscore_ops); + amd_iommu_snp_enable(); ret = amd_iommu_init_pci(); init_state = ret ? IOMMU_INIT_ERROR : IOMMU_PCI_INIT; break; @@ -3814,16 +3815,6 @@ int amd_iommu_snp_enable(void) return -EINVAL; } - /* - * Prevent enabling SNP after IOMMU_ENABLED state because this process - * affect how IOMMU driver sets up data structures and configures - * IOMMU hardware. - */ - if (init_state > IOMMU_ENABLED) { - pr_err("SNP: Too late to enable SNP for IOMMU.\n"); - return -EINVAL; - } - amd_iommu_snp_en = check_feature_on_all_iommus(FEATURE_SNP); if (!amd_iommu_snp_en) return -EINVAL; and now you only need to line up snp_rmptable_init() after IOMMU init instead of having it be a fs_initcall which happens right after pci_subsys_init() so that PCI is there but at the right time when iommu init state is at IOMMU_ENABLED but no later because then it is too late. And there you need to test amd_iommu_snp_en which is already exported anyway. Ok?
On 11/7/2023 2:27 PM, Borislav Petkov wrote: > On Tue, Nov 07, 2023 at 08:19:31PM +0100, Borislav Petkov wrote: >> Arch code does not call drivers - arch code sets up the arch and >> provides facilities which the drivers use. > > IOW (just an example diff): > > diff --git a/drivers/iommu/amd/init.c b/drivers/iommu/amd/init.c > index 1c9924de607a..00cdbc844961 100644 > --- a/drivers/iommu/amd/init.c > +++ b/drivers/iommu/amd/init.c > @@ -3290,6 +3290,7 @@ static int __init state_next(void) > break; > case IOMMU_ENABLED: > register_syscore_ops(&amd_iommu_syscore_ops); > + amd_iommu_snp_enable(); > ret = amd_iommu_init_pci(); > init_state = ret ? IOMMU_INIT_ERROR : IOMMU_PCI_INIT; > break; > @@ -3814,16 +3815,6 @@ int amd_iommu_snp_enable(void) > return -EINVAL; > } > > - /* > - * Prevent enabling SNP after IOMMU_ENABLED state because this process > - * affect how IOMMU driver sets up data structures and configures > - * IOMMU hardware. > - */ > - if (init_state > IOMMU_ENABLED) { > - pr_err("SNP: Too late to enable SNP for IOMMU.\n"); > - return -EINVAL; > - } > - > amd_iommu_snp_en = check_feature_on_all_iommus(FEATURE_SNP); > if (!amd_iommu_snp_en) > return -EINVAL; > > and now you only need to line up snp_rmptable_init() after IOMMU init > instead of having it be a fs_initcall which happens right after > pci_subsys_init() so that PCI is there but at the right time when iommu > init state is at IOMMU_ENABLED but no later because then it is too late. > > And there you need to test amd_iommu_snp_en which is already exported > anyway. > > Ok? > No, this is not correct as this will always enable SNP support on IOMMU even when SNP support is not supported and enabled on the platform, and then we will do stuff like forcing IOMMU v1 pagetables which we really don't want to do if SNP is not supported and enabled on the platform. That's what snp_rmptable_init() calling amd_iommu_snp_enable() ensures that SNP on IOMMU is *only* enabled when platform/arch support for it is detected and enabled. And isn't IOMMU driver always going to be built-in and isn't it part of the platform support (not arch code, but surely platform specific code)? (IOMMU enablement is requirement for SNP). Thanks, Ashish
On Tue, Nov 07, 2023 at 03:21:29PM -0600, Kalra, Ashish wrote: > No, this is not correct as this will always enable SNP support on > IOMMU even when SNP support is not supported and enabled on the > platform, You see that we set or clear X86_FEATURE_SEV_SNP depending on support, right? Which means, you need to test that bit in amd_iommu_snp_enable() first. > And isn't IOMMU driver always going to be built-in and isn't it part of the > platform support (not arch code, but surely platform specific code)? > (IOMMU enablement is requirement for SNP). Read the note again about the fragile ordering in my previous mail.
Ontop. Only build-tested. --- diff --git a/arch/x86/include/asm/iommu.h b/arch/x86/include/asm/iommu.h index 2fd52b65deac..3be2451e7bc8 100644 --- a/arch/x86/include/asm/iommu.h +++ b/arch/x86/include/asm/iommu.h @@ -10,6 +10,7 @@ extern int force_iommu, no_iommu; extern int iommu_detected; extern int iommu_merge; extern int panic_on_overflow; +extern bool amd_iommu_snp_en; #ifdef CONFIG_SWIOTLB extern bool x86_swiotlb_enable; diff --git a/arch/x86/virt/svm/sev.c b/arch/x86/virt/svm/sev.c index 8b9ed72489e4..9237c327ad6d 100644 --- a/arch/x86/virt/svm/sev.c +++ b/arch/x86/virt/svm/sev.c @@ -196,23 +196,15 @@ static __init int __snp_rmptable_init(void) static int __init snp_rmptable_init(void) { - int family, model; - - if (!cpu_feature_enabled(X86_FEATURE_SEV_SNP)) + if (!amd_iommu_snp_en) return 0; - family = boot_cpu_data.x86; - model = boot_cpu_data.x86_model; - /* * RMP table entry format is not architectural and it can vary by processor and * is defined by the per-processor PPR. Restrict SNP support on the known CPU * model and family for which the RMP table entry format is currently defined for. */ - if (family != 0x19 || model > 0xaf) - goto nosnp; - - if (amd_iommu_snp_enable()) + if (boot_cpu_data.x86 != 0x19 || boot_cpu_data.x86_model > 0xaf) goto nosnp; if (__snp_rmptable_init()) @@ -228,12 +220,10 @@ static int __init snp_rmptable_init(void) } /* - * This must be called after the PCI subsystem. This is because amd_iommu_snp_enable() - * is called to ensure the IOMMU supports the SEV-SNP feature, which can only be - * called after subsys_initcall(). + * This must be called after the IOMMU has been initialized. * * NOTE: IOMMU is enforced by SNP to ensure that hypervisor cannot program DMA * directly into guest private memory. In case of SNP, the IOMMU ensures that * the page(s) used for DMA are hypervisor owned. */ -fs_initcall(snp_rmptable_init); +device_initcall(snp_rmptable_init); diff --git a/drivers/iommu/amd/amd_iommu.h b/drivers/iommu/amd/amd_iommu.h index e2857109e966..353d68b25fe2 100644 --- a/drivers/iommu/amd/amd_iommu.h +++ b/drivers/iommu/amd/amd_iommu.h @@ -148,6 +148,4 @@ struct dev_table_entry *get_dev_table(struct amd_iommu *iommu); extern u64 amd_iommu_efr; extern u64 amd_iommu_efr2; - -extern bool amd_iommu_snp_en; #endif diff --git a/drivers/iommu/amd/init.c b/drivers/iommu/amd/init.c index 1c9924de607a..9e72cd8413bb 100644 --- a/drivers/iommu/amd/init.c +++ b/drivers/iommu/amd/init.c @@ -3255,6 +3255,35 @@ static bool __init detect_ivrs(void) return true; } +#ifdef CONFIG_KVM_AMD_SEV +static void iommu_snp_enable(void) +{ + if (!cpu_feature_enabled(X86_FEATURE_SEV_SNP)) + return; + + /* + * The SNP support requires that IOMMU must be enabled, and is + * not configured in the passthrough mode. + */ + if (no_iommu || iommu_default_passthrough()) { + pr_err("SNP: IOMMU is disabled or configured in passthrough mode, SNP cannot be supported"); + return; + } + + amd_iommu_snp_en = check_feature_on_all_iommus(FEATURE_SNP); + if (!amd_iommu_snp_en) + return; + + pr_info("SNP enabled\n"); + + /* Enforce IOMMU v1 pagetable when SNP is enabled. */ + if (amd_iommu_pgtable != AMD_IOMMU_V1) { + pr_warn("Force to using AMD IOMMU v1 page table due to SNP\n"); + amd_iommu_pgtable = AMD_IOMMU_V1; + } +} +#endif + /**************************************************************************** * * AMD IOMMU Initialization State Machine @@ -3290,6 +3319,7 @@ static int __init state_next(void) break; case IOMMU_ENABLED: register_syscore_ops(&amd_iommu_syscore_ops); + iommu_snp_enable(); ret = amd_iommu_init_pci(); init_state = ret ? IOMMU_INIT_ERROR : IOMMU_PCI_INIT; break; @@ -3802,40 +3832,4 @@ int amd_iommu_pc_set_reg(struct amd_iommu *iommu, u8 bank, u8 cntr, u8 fxn, u64 return iommu_pc_get_set_reg(iommu, bank, cntr, fxn, value, true); } -#ifdef CONFIG_KVM_AMD_SEV -int amd_iommu_snp_enable(void) -{ - /* - * The SNP support requires that IOMMU must be enabled, and is - * not configured in the passthrough mode. - */ - if (no_iommu || iommu_default_passthrough()) { - pr_err("SNP: IOMMU is disabled or configured in passthrough mode, SNP cannot be supported"); - return -EINVAL; - } - - /* - * Prevent enabling SNP after IOMMU_ENABLED state because this process - * affect how IOMMU driver sets up data structures and configures - * IOMMU hardware. - */ - if (init_state > IOMMU_ENABLED) { - pr_err("SNP: Too late to enable SNP for IOMMU.\n"); - return -EINVAL; - } - - amd_iommu_snp_en = check_feature_on_all_iommus(FEATURE_SNP); - if (!amd_iommu_snp_en) - return -EINVAL; - - pr_info("SNP enabled\n"); - - /* Enforce IOMMU v1 pagetable when SNP is enabled. */ - if (amd_iommu_pgtable != AMD_IOMMU_V1) { - pr_warn("Force to using AMD IOMMU v1 page table due to SNP\n"); - amd_iommu_pgtable = AMD_IOMMU_V1; - } - return 0; -} -#endif
On 11/7/2023 4:08 PM, Borislav Petkov wrote: > static int __init snp_rmptable_init(void) > { > - int family, model; > - > - if (!cpu_feature_enabled(X86_FEATURE_SEV_SNP)) > + if (!amd_iommu_snp_en) > return 0; > We will still need some method to tell the IOMMU driver if SNP support/feature is disabled by this function, for example, when CPU family and model is not supported by SNP and we jump to no_snp label. The reliable way for this to work is to ensure snp_rmptable_init() is called before IOMMU initialization and then IOMMU initialization depends on SNP feature flag setup by snp_rmptable_init() to enable SNP support on IOMMU or not. If snp_rmptable_init() is called after IOMMU initialization and it detects an issue with SNP support it will clear the SNP feature but the IOMMU driver does not get notified about it, therefore, snp_rmptable_init() should get called before IOMMU initialization or as part of IOMMU initialization, for example, amd_iommu_enable() calling snp_rmptable_init() before calling iommu_snp_enable(). Thanks, Ashish
On Tue, Nov 07, 2023 at 04:33:41PM -0600, Kalra, Ashish wrote: > We will still need some method to tell the IOMMU driver if SNP > support/feature is disabled by this function, for example, when CPU family > and model is not supported by SNP and we jump to no_snp label. See below. > The reliable way for this to work is to ensure snp_rmptable_init() is called > before IOMMU initialization and then IOMMU initialization depends on SNP > feature flag setup by snp_rmptable_init() to enable SNP support on IOMMU or > not. Yes, this whole SNP initialization needs to be reworked and split this way: - early detection work which needs to be done once goes to bsp_init_amd(): that's basically your early_detect_mem_encrypt() stuff which needs to happen exactly only once and early. - Any work like: c->x86_phys_bits -= (cpuid_ebx(0x8000001f) >> 6) & 0x3f; and the like which needs to happen on each AP, gets put in a function which gets called by init_amd(). By the time IOMMU gets to init, you already know whether it should enable SNP and check X86_FEATURE_SEV_SNP. Finally, you call __snp_rmptable_init() which does the *per-CPU* init work which is still pending. Ok? Ontop of the previous ontop patch: --- diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 6cc2074fcea3..a9c95e5d6b06 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -674,8 +674,19 @@ static void early_detect_mem_encrypt(struct cpuinfo_x86 *c) if (!(msr & MSR_K7_HWCR_SMMLOCK)) goto clear_sev; - if (cpu_has(c, X86_FEATURE_SEV_SNP) && !early_rmptable_check()) - goto clear_snp; + if (cpu_has(c, X86_FEATURE_SEV_SNP)) { + /* + * RMP table entry format is not architectural and it can vary by processor + * and is defined by the per-processor PPR. Restrict SNP support on the known + * CPU model and family for which the RMP table entry format is currently + * defined for. + */ + if (c->x86 != 0x19 || c->x86_model > 0xaf) + goto clear_snp; + + if (!early_rmptable_check()) + goto clear_snp; + } return; diff --git a/arch/x86/virt/svm/sev.c b/arch/x86/virt/svm/sev.c index 9237c327ad6d..5a71df9ae4cb 100644 --- a/arch/x86/virt/svm/sev.c +++ b/arch/x86/virt/svm/sev.c @@ -199,14 +199,6 @@ static int __init snp_rmptable_init(void) if (!amd_iommu_snp_en) return 0; - /* - * RMP table entry format is not architectural and it can vary by processor and - * is defined by the per-processor PPR. Restrict SNP support on the known CPU - * model and family for which the RMP table entry format is currently defined for. - */ - if (boot_cpu_data.x86 != 0x19 || boot_cpu_data.x86_model > 0xaf) - goto nosnp; - if (__snp_rmptable_init()) goto nosnp;
On 07/11/2023 19:32, Tom Lendacky wrote: > On 11/7/23 10:31, Borislav Petkov wrote: >> >> And the stuff that needs to happen once, needs to be called once too. >> >>> + >>> + return snp_get_rmptable_info(&rmp_base, &rmp_size); >>> +} >>> + >>> static void early_detect_mem_encrypt(struct cpuinfo_x86 *c) >>> { >>> u64 msr; >>> @@ -659,6 +674,9 @@ static void early_detect_mem_encrypt(struct cpuinfo_x86 *c) >>> if (!(msr & MSR_K7_HWCR_SMMLOCK)) >>> goto clear_sev; >>> + if (cpu_has(c, X86_FEATURE_SEV_SNP) && !early_rmptable_check()) >>> + goto clear_snp; >>> + >>> return; >>> clear_all: >>> @@ -666,6 +684,7 @@ static void early_detect_mem_encrypt(struct cpuinfo_x86 *c) >>> clear_sev: >>> setup_clear_cpu_cap(X86_FEATURE_SEV); >>> setup_clear_cpu_cap(X86_FEATURE_SEV_ES); >>> +clear_snp: >>> setup_clear_cpu_cap(X86_FEATURE_SEV_SNP); >>> } >>> } >> >> ... >> >>> +bool snp_get_rmptable_info(u64 *start, u64 *len) >>> +{ >>> + u64 max_rmp_pfn, calc_rmp_sz, rmp_sz, rmp_base, rmp_end; >>> + >>> + rdmsrl(MSR_AMD64_RMP_BASE, rmp_base); >>> + rdmsrl(MSR_AMD64_RMP_END, rmp_end); >>> + >>> + if (!(rmp_base & RMP_ADDR_MASK) || !(rmp_end & RMP_ADDR_MASK)) { >>> + pr_err("Memory for the RMP table has not been reserved by BIOS\n"); >>> + return false; >>> + } >> >> If you're masking off bits 0-12 above... > > Because the RMP_END MSR, most specifically, has a default value of 0x1fff, where bits [12:0] are reserved. So to specifically check if the MSR has been set to a non-zero end value, the bit are masked off. However, ... > Do you have a source for this? Because the APM vol. 2, table A.7 says the reset value of RMP_END is all zeros. Thanks, Jeremi
On 08/11/2023 07:14, Borislav Petkov wrote: > On Tue, Nov 07, 2023 at 04:33:41PM -0600, Kalra, Ashish wrote: >> We will still need some method to tell the IOMMU driver if SNP >> support/feature is disabled by this function, for example, when CPU family >> and model is not supported by SNP and we jump to no_snp label. > > See below. > >> The reliable way for this to work is to ensure snp_rmptable_init() is called >> before IOMMU initialization and then IOMMU initialization depends on SNP >> feature flag setup by snp_rmptable_init() to enable SNP support on IOMMU or >> not. > > Yes, this whole SNP initialization needs to be reworked and split this > way: I agree with Borislav and have some comments of my own. > > - early detection work which needs to be done once goes to > bsp_init_amd(): that's basically your early_detect_mem_encrypt() stuff > which needs to happen exactly only once and early. > > - Any work like: > > c->x86_phys_bits -= (cpuid_ebx(0x8000001f) >> 6) & 0x3f; > > and the like which needs to happen on each AP, gets put in a function > which gets called by init_amd(). > > By the time IOMMU gets to init, you already know whether it should > enable SNP and check X86_FEATURE_SEV_SNP. This flow would suit me better too. In SNP-host capable Hyper-V VMs there is no IOMMU and I've had to resort to early return from amd_iommu_snp_enable() to prevent it from disabling SNP [1]. In addition to what Borislav posted, you'd just need to enforce that if IOMMU is detected it actually gets enabled. [1]: https://lore.kernel.org/lkml/20230213103402.1189285-6-jpiotrowski@linux.microsoft.com/ > > Finally, you call __snp_rmptable_init() which does the *per-CPU* init > work which is still pending. Yes please, and the only rmp thing left to do per-CPU would be to check that the MSRs are set the same as the value read from CPU0. Running the early_rmptable_check() from early_detect_mem_encrypt() and on every CPU makes it difficult to support a kernel allocated RMP table. If you look at what I did for the mentioned Hyper-V SNP-host VMs [2] (which I think is reasonable) the RMP table is allocated in init_mem_mapping() and the addresses are propagated to other CPUs through hv_cpu_init(), which is called from cpuhp_setup_state(CPUHP_AP_HYPERV_ONLINE, ...). So it would be great if any init works plays nice with cpu hotplug notifiers. [2]: https://lore.kernel.org/lkml/20230213103402.1189285-2-jpiotrowski@linux.microsoft.com/ Thanks, Jeremi > > Ok? > > Ontop of the previous ontop patch: > > --- > diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c > index 6cc2074fcea3..a9c95e5d6b06 100644 > --- a/arch/x86/kernel/cpu/amd.c > +++ b/arch/x86/kernel/cpu/amd.c > @@ -674,8 +674,19 @@ static void early_detect_mem_encrypt(struct cpuinfo_x86 *c) > if (!(msr & MSR_K7_HWCR_SMMLOCK)) > goto clear_sev; > > - if (cpu_has(c, X86_FEATURE_SEV_SNP) && !early_rmptable_check()) > - goto clear_snp; > + if (cpu_has(c, X86_FEATURE_SEV_SNP)) { > + /* > + * RMP table entry format is not architectural and it can vary by processor > + * and is defined by the per-processor PPR. Restrict SNP support on the known > + * CPU model and family for which the RMP table entry format is currently > + * defined for. > + */ > + if (c->x86 != 0x19 || c->x86_model > 0xaf) > + goto clear_snp; > + > + if (!early_rmptable_check()) > + goto clear_snp; > + } > > return; > > diff --git a/arch/x86/virt/svm/sev.c b/arch/x86/virt/svm/sev.c > index 9237c327ad6d..5a71df9ae4cb 100644 > --- a/arch/x86/virt/svm/sev.c > +++ b/arch/x86/virt/svm/sev.c > @@ -199,14 +199,6 @@ static int __init snp_rmptable_init(void) > if (!amd_iommu_snp_en) > return 0; > > - /* > - * RMP table entry format is not architectural and it can vary by processor and > - * is defined by the per-processor PPR. Restrict SNP support on the known CPU > - * model and family for which the RMP table entry format is currently defined for. > - */ > - if (boot_cpu_data.x86 != 0x19 || boot_cpu_data.x86_model > 0xaf) > - goto nosnp; > - > if (__snp_rmptable_init()) > goto nosnp; >
On 11/8/23 02:21, Jeremi Piotrowski wrote: > On 07/11/2023 19:32, Tom Lendacky wrote: >> On 11/7/23 10:31, Borislav Petkov wrote: >>> >>> And the stuff that needs to happen once, needs to be called once too. >>> >>>> + >>>> + return snp_get_rmptable_info(&rmp_base, &rmp_size); >>>> +} >>>> + >>>> static void early_detect_mem_encrypt(struct cpuinfo_x86 *c) >>>> { >>>> u64 msr; >>>> @@ -659,6 +674,9 @@ static void early_detect_mem_encrypt(struct cpuinfo_x86 *c) >>>> if (!(msr & MSR_K7_HWCR_SMMLOCK)) >>>> goto clear_sev; >>>> + if (cpu_has(c, X86_FEATURE_SEV_SNP) && !early_rmptable_check()) >>>> + goto clear_snp; >>>> + >>>> return; >>>> clear_all: >>>> @@ -666,6 +684,7 @@ static void early_detect_mem_encrypt(struct cpuinfo_x86 *c) >>>> clear_sev: >>>> setup_clear_cpu_cap(X86_FEATURE_SEV); >>>> setup_clear_cpu_cap(X86_FEATURE_SEV_ES); >>>> +clear_snp: >>>> setup_clear_cpu_cap(X86_FEATURE_SEV_SNP); >>>> } >>>> } >>> >>> ... >>> >>>> +bool snp_get_rmptable_info(u64 *start, u64 *len) >>>> +{ >>>> + u64 max_rmp_pfn, calc_rmp_sz, rmp_sz, rmp_base, rmp_end; >>>> + >>>> + rdmsrl(MSR_AMD64_RMP_BASE, rmp_base); >>>> + rdmsrl(MSR_AMD64_RMP_END, rmp_end); >>>> + >>>> + if (!(rmp_base & RMP_ADDR_MASK) || !(rmp_end & RMP_ADDR_MASK)) { >>>> + pr_err("Memory for the RMP table has not been reserved by BIOS\n"); >>>> + return false; >>>> + } >>> >>> If you're masking off bits 0-12 above... >> >> Because the RMP_END MSR, most specifically, has a default value of 0x1fff, where bits [12:0] are reserved. So to specifically check if the MSR has been set to a non-zero end value, the bit are masked off. However, ... >> > > Do you have a source for this? Because the APM vol. 2, table A.7 says the reset value of RMP_END is all zeros. Ah, good catch. Let me work on getting the APM updated. Thanks, Tom > > Thanks, > Jeremi >
On 11/8/2023 12:14 AM, Borislav Petkov wrote: > On Tue, Nov 07, 2023 at 04:33:41PM -0600, Kalra, Ashish wrote: >> We will still need some method to tell the IOMMU driver if SNP >> support/feature is disabled by this function, for example, when CPU family >> and model is not supported by SNP and we jump to no_snp label. > > See below. > >> The reliable way for this to work is to ensure snp_rmptable_init() is called >> before IOMMU initialization and then IOMMU initialization depends on SNP >> feature flag setup by snp_rmptable_init() to enable SNP support on IOMMU or >> not. > > Yes, this whole SNP initialization needs to be reworked and split this > way: > > - early detection work which needs to be done once goes to > bsp_init_amd(): that's basically your early_detect_mem_encrypt() stuff > which needs to happen exactly only once and early. > > - Any work like: > > c->x86_phys_bits -= (cpuid_ebx(0x8000001f) >> 6) & 0x3f; > > and the like which needs to happen on each AP, gets put in a function > which gets called by init_amd(). > > By the time IOMMU gets to init, you already know whether it should > enable SNP and check X86_FEATURE_SEV_SNP. > > Finally, you call __snp_rmptable_init() which does the *per-CPU* init > work which is still pending. > > Ok? Yes, will need to rework the SNP initialization stuff, the important point is that we want to do snp_rmptable_init() stuff before IOMMU initialization as for things like RMP table not correctly setup, etc., we don't want IOMMU initialization to enable SNP on the IOMMUs. Thanks, Ashish
On 07/11/2023 20:00, Kalra, Ashish wrote: > Hello Boris, > > Addressing of some of the remaining comments: > > On 11/7/2023 10:31 AM, Borislav Petkov wrote: >> On Mon, Oct 16, 2023 at 08:27:35AM -0500, Michael Roth wrote: >>> +static bool early_rmptable_check(void) >>> +{ >>> + u64 rmp_base, rmp_size; >>> + >>> + /* >>> + * For early BSP initialization, max_pfn won't be set up yet, wait until >>> + * it is set before performing the RMP table calculations. >>> + */ >>> + if (!max_pfn) >>> + return true; >> >> This already says that this is called at the wrong point during init. >> >> Right now we have >> >> early_identify_cpu -> early_init_amd -> early_detect_mem_encrypt >> >> which runs only on the BSP but then early_init_amd() is called in >> init_amd() too so that it takes care of the APs too. >> >> Which ends up doing a lot of unnecessary work on each AP in >> early_detect_mem_encrypt() like calculating the RMP size on each AP >> unnecessarily where this needs to happen exactly once. >> >> Is there any reason why this function cannot be moved to init_amd() >> where it'll do the normal, per-AP init? >> >> And the stuff that needs to happen once, needs to be called once too. >> >>> + >>> + return snp_get_rmptable_info(&rmp_base, &rmp_size); >>> +} >>> + >>> static void early_detect_mem_encrypt(struct cpuinfo_x86 *c) >>> { >>> u64 msr; >>> @@ -659,6 +674,9 @@ static void early_detect_mem_encrypt(struct cpuinfo_x86 *c) >>> if (!(msr & MSR_K7_HWCR_SMMLOCK)) >>> goto clear_sev; >>> + if (cpu_has(c, X86_FEATURE_SEV_SNP) && !early_rmptable_check()) >>> + goto clear_snp; >>> + >>> return; >>> clear_all: >>> @@ -666,6 +684,7 @@ static void early_detect_mem_encrypt(struct cpuinfo_x86 *c) >>> clear_sev: >>> setup_clear_cpu_cap(X86_FEATURE_SEV); >>> setup_clear_cpu_cap(X86_FEATURE_SEV_ES); >>> +clear_snp: >>> setup_clear_cpu_cap(X86_FEATURE_SEV_SNP); >>> } >>> } >> >> ... >> >>> +bool snp_get_rmptable_info(u64 *start, u64 *len) >>> +{ >>> + u64 max_rmp_pfn, calc_rmp_sz, rmp_sz, rmp_base, rmp_end; >>> + >>> + rdmsrl(MSR_AMD64_RMP_BASE, rmp_base); >>> + rdmsrl(MSR_AMD64_RMP_END, rmp_end); >>> + >>> + if (!(rmp_base & RMP_ADDR_MASK) || !(rmp_end & RMP_ADDR_MASK)) { >>> + pr_err("Memory for the RMP table has not been reserved by BIOS\n"); >>> + return false; >>> + } >> >> If you're masking off bits 0-12 above... >> >>> + >>> + if (rmp_base > rmp_end) { >> >> ... why aren't you using the masked out vars further on? >> >> I know, the hw will say, yeah, those bits are 0 but still. IOW, do: >> >> rmp_base &= RMP_ADDR_MASK; >> rmp_end &= RMP_ADDR_MASK; >> >> after reading them. >> >>> + pr_err("RMP configuration not valid: base=%#llx, end=%#llx\n", rmp_base, rmp_end); >>> + return false; >>> + } >>> + >>> + rmp_sz = rmp_end - rmp_base + 1; >>> + >>> + /* >>> + * Calculate the amount the memory that must be reserved by the BIOS to >>> + * address the whole RAM, including the bookkeeping area. The RMP itself >>> + * must also be covered. >>> + */ >>> + max_rmp_pfn = max_pfn; >>> + if (PHYS_PFN(rmp_end) > max_pfn) >>> + max_rmp_pfn = PHYS_PFN(rmp_end); >>> + >>> + calc_rmp_sz = (max_rmp_pfn << 4) + RMPTABLE_CPU_BOOKKEEPING_SZ; >>> + >>> + if (calc_rmp_sz > rmp_sz) { >>> + pr_err("Memory reserved for the RMP table does not cover full system RAM (expected 0x%llx got 0x%llx)\n", >>> + calc_rmp_sz, rmp_sz); >>> + return false; >>> + } >>> + >>> + *start = rmp_base; >>> + *len = rmp_sz; >>> + >>> + return true; >>> +} >>> + >>> +static __init int __snp_rmptable_init(void) >>> +{ >>> + u64 rmp_base, rmp_size; >>> + void *rmp_start; >>> + u64 val; >>> + >>> + if (!snp_get_rmptable_info(&rmp_base, &rmp_size)) >>> + return 1; >>> + >>> + pr_info("RMP table physical address [0x%016llx - 0x%016llx]\n", >> >> That's "RMP table physical range" >> >>> + rmp_base, rmp_base + rmp_size - 1); >>> + >>> + rmp_start = memremap(rmp_base, rmp_size, MEMREMAP_WB); >>> + if (!rmp_start) { >>> + pr_err("Failed to map RMP table addr 0x%llx size 0x%llx\n", rmp_base, rmp_size); >> >> No need to dump rmp_base and rmp_size again here - you're dumping them >> above. >> >>> + return 1; >>> + } >>> + >>> + /* >>> + * Check if SEV-SNP is already enabled, this can happen in case of >>> + * kexec boot. >>> + */ >>> + rdmsrl(MSR_AMD64_SYSCFG, val); >>> + if (val & MSR_AMD64_SYSCFG_SNP_EN) >>> + goto skip_enable; >>> + >>> + /* Initialize the RMP table to zero */ >> >> Again: useless comment. >> >>> + memset(rmp_start, 0, rmp_size); >>> + >>> + /* Flush the caches to ensure that data is written before SNP is enabled. */ >>> + wbinvd_on_all_cpus(); >>> + >>> + /* MFDM must be enabled on all the CPUs prior to enabling SNP. */ >> >> First of all, use the APM bit name here pls: MtrrFixDramModEn. >> >> And then, for the life of me, I can't find any mention in the APM why >> this bit is needed. Neither in "15.36.2 Enabling SEV-SNP" nor in >> "15.34.3 Enabling SEV". >> >> Looking at the bit defintions of WrMem an RdMem - read and write >> requests get directed to system memory instead of MMIO so I guess you >> don't want to be able to write MMIO for certain physical ranges when SNP >> is enabled but it'll be good to have this properly explained instead of >> a "this must happen" information-less sentence. > > This is a per-requisite for SNP_INIT as per the SNP Firmware ABI specifications, section 8.8.2: > > From the SNP FW ABI specs: > > If INIT_RMP is 1, then the firmware ensures the following system requirements are met: > • SYSCFG[MemoryEncryptionModEn] must be set to 1 across all cores. (SEV must be > enabled.)> • SYSCFG[SecureNestedPagingEn] must be set to 1 across all cores. > • SYSCFG[VMPLEn] must be set to 1 across all cores. > • SYSCFG[MFDM] must be set to 1 across all cores. Hi Ashish, I just noticed that the kernel shouts at me about this bit when I offline->online a CPU in an SNP host: [2692586.589194] smpboot: CPU 63 is now offline [2692589.366822] [Firmware Warn]: MTRR: CPU 0: SYSCFG[MtrrFixDramModEn] not cleared by BIOS, clearing this bit [2692589.376582] smpboot: Booting Node 0 Processor 63 APIC 0x3f [2692589.378070] [Firmware Warn]: MTRR: CPU 63: SYSCFG[MtrrFixDramModEn] not cleared by BIOS, clearing this bit [2692589.388845] microcode: CPU63: new patch_level=0x0a0011d1 Now I understand if you say "CPU offlining is not supported" but there's nothing currently blocking it. Best wishes, Jeremi > • VM_HSAVE_PA (MSR C001_0117) must be set to 0h across all cores. > • HWCR[SmmLock] (MSR C001_0015) must be set to 1 across all cores. > > So, this platform enabling code for SNP needs to ensure that these conditions are met before SNP_INIT is called. >
Hello Jeremi, > Hi Ashish, > > I just noticed that the kernel shouts at me about this bit when I offline->online a CPU in > an SNP host: Yes, i also observe the same warning when i bring a CPU back online. > > [2692586.589194] smpboot: CPU 63 is now offline > [2692589.366822] [Firmware Warn]: MTRR: CPU 0: SYSCFG[MtrrFixDramModEn] not cleared by BIOS, clearing this bit > [2692589.376582] smpboot: Booting Node 0 Processor 63 APIC 0x3f > [2692589.378070] [Firmware Warn]: MTRR: CPU 63: SYSCFG[MtrrFixDramModEn] not cleared by BIOS, clearing this bit > [2692589.388845] microcode: CPU63: new patch_level=0x0a0011d1 > > Now I understand if you say "CPU offlining is not supported" but there's nothing currently > blocking it. > There is CPU hotplug support for SNP platform to do __snp_enable() when bringing a CPU back online, but not really sure what needs to be done for MtrrFixDramModeEn bit in SYSCFG, discussing with the FW developers. Thanks, Ashish
On Tue, Nov 07, 2023 at 05:31:42PM +0100, Borislav Petkov wrote: > On Mon, Oct 16, 2023 at 08:27:35AM -0500, Michael Roth wrote: > > +static bool early_rmptable_check(void) > > +{ > > + u64 rmp_base, rmp_size; > > + > > + /* > > + * For early BSP initialization, max_pfn won't be set up yet, wait until > > + * it is set before performing the RMP table calculations. > > + */ > > + if (!max_pfn) > > + return true; > > This already says that this is called at the wrong point during init. > > Right now we have > > early_identify_cpu -> early_init_amd -> early_detect_mem_encrypt > > which runs only on the BSP but then early_init_amd() is called in > init_amd() too so that it takes care of the APs too. > > Which ends up doing a lot of unnecessary work on each AP in > early_detect_mem_encrypt() like calculating the RMP size on each AP > unnecessarily where this needs to happen exactly once. > > Is there any reason why this function cannot be moved to init_amd() > where it'll do the normal, per-AP init? > > And the stuff that needs to happen once, needs to be called once too. I've renamed/repurposed snp_get_rmptable_info() to snp_probe_rmptable_info(). It now reads the MSRs, sanity-checks them, and stores the values into ro_after_init variables on success. Subsequent code uses those values to initialize the RMP table mapping instead of re-reading the MSRs. I've moved the call-site for snp_probe_rmptable_info() to bsp_init_amd(), which gets called right after early_init_amd(), so should still be early enough to clear X86_FEATURE_SEV_SNP such that AutoIBRS doesn't get disabled if SNP isn't available on the system. APs don't call bsp_init_amd(), so that should avoid doing multiple MSR reads. And I think Ashish has all the other review comments addressed now. Thanks, Mike
diff --git a/arch/x86/Kbuild b/arch/x86/Kbuild index 5a83da703e87..6a1f36df6a18 100644 --- a/arch/x86/Kbuild +++ b/arch/x86/Kbuild @@ -28,5 +28,7 @@ obj-y += net/ obj-$(CONFIG_KEXEC_FILE) += purgatory/ +obj-y += virt/svm/ + # for cleaning subdir- += boot tools diff --git a/arch/x86/include/asm/disabled-features.h b/arch/x86/include/asm/disabled-features.h index 702d93fdd10e..83efd407033b 100644 --- a/arch/x86/include/asm/disabled-features.h +++ b/arch/x86/include/asm/disabled-features.h @@ -117,6 +117,12 @@ #define DISABLE_IBT (1 << (X86_FEATURE_IBT & 31)) #endif +#ifdef CONFIG_KVM_AMD_SEV +# define DISABLE_SEV_SNP 0 +#else +# define DISABLE_SEV_SNP (1 << (X86_FEATURE_SEV_SNP & 31)) +#endif + /* * Make sure to add features to the correct mask */ @@ -141,7 +147,7 @@ DISABLE_ENQCMD) #define DISABLED_MASK17 0 #define DISABLED_MASK18 (DISABLE_IBT) -#define DISABLED_MASK19 0 +#define DISABLED_MASK19 (DISABLE_SEV_SNP) #define DISABLED_MASK20 0 #define DISABLED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 21) diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index 1d111350197f..2be74afb4cbd 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -589,6 +589,8 @@ #define MSR_AMD64_SEV_ENABLED BIT_ULL(MSR_AMD64_SEV_ENABLED_BIT) #define MSR_AMD64_SEV_ES_ENABLED BIT_ULL(MSR_AMD64_SEV_ES_ENABLED_BIT) #define MSR_AMD64_SEV_SNP_ENABLED BIT_ULL(MSR_AMD64_SEV_SNP_ENABLED_BIT) +#define MSR_AMD64_RMP_BASE 0xc0010132 +#define MSR_AMD64_RMP_END 0xc0010133 /* SNP feature bits enabled by the hypervisor */ #define MSR_AMD64_SNP_VTOM BIT_ULL(3) @@ -690,7 +692,14 @@ #define MSR_K8_TOP_MEM2 0xc001001d #define MSR_AMD64_SYSCFG 0xc0010010 #define MSR_AMD64_SYSCFG_MEM_ENCRYPT_BIT 23 -#define MSR_AMD64_SYSCFG_MEM_ENCRYPT BIT_ULL(MSR_AMD64_SYSCFG_MEM_ENCRYPT_BIT) +#define MSR_AMD64_SYSCFG_MEM_ENCRYPT BIT_ULL(MSR_AMD64_SYSCFG_MEM_ENCRYPT_BIT) +#define MSR_AMD64_SYSCFG_SNP_EN_BIT 24 +#define MSR_AMD64_SYSCFG_SNP_EN BIT_ULL(MSR_AMD64_SYSCFG_SNP_EN_BIT) +#define MSR_AMD64_SYSCFG_SNP_VMPL_EN_BIT 25 +#define MSR_AMD64_SYSCFG_SNP_VMPL_EN BIT_ULL(MSR_AMD64_SYSCFG_SNP_VMPL_EN_BIT) +#define MSR_AMD64_SYSCFG_MFDM_BIT 19 +#define MSR_AMD64_SYSCFG_MFDM BIT_ULL(MSR_AMD64_SYSCFG_MFDM_BIT) + #define MSR_K8_INT_PENDING_MSG 0xc0010055 /* C1E active bits in int pending message */ #define K8_INTP_C1E_ACTIVE_MASK 0x18000000 diff --git a/arch/x86/include/asm/sev.h b/arch/x86/include/asm/sev.h index 5b4a1ce3d368..b05fcd0ab7e4 100644 --- a/arch/x86/include/asm/sev.h +++ b/arch/x86/include/asm/sev.h @@ -243,4 +243,10 @@ static inline u64 snp_get_unsupported_features(u64 status) { return 0; } static inline u64 sev_get_status(void) { return 0; } #endif +#ifdef CONFIG_KVM_AMD_SEV +bool snp_get_rmptable_info(u64 *start, u64 *len); +#else +static inline bool snp_get_rmptable_info(u64 *start, u64 *len) { return false; } +#endif + #endif diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 14ee7f750cc7..6cc2074fcea3 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -20,6 +20,7 @@ #include <asm/delay.h> #include <asm/debugreg.h> #include <asm/resctrl.h> +#include <asm/sev.h> #ifdef CONFIG_X86_64 # include <asm/mmconfig.h> @@ -618,6 +619,20 @@ static void bsp_init_amd(struct cpuinfo_x86 *c) resctrl_cpu_detect(c); } +static bool early_rmptable_check(void) +{ + u64 rmp_base, rmp_size; + + /* + * For early BSP initialization, max_pfn won't be set up yet, wait until + * it is set before performing the RMP table calculations. + */ + if (!max_pfn) + return true; + + return snp_get_rmptable_info(&rmp_base, &rmp_size); +} + static void early_detect_mem_encrypt(struct cpuinfo_x86 *c) { u64 msr; @@ -659,6 +674,9 @@ static void early_detect_mem_encrypt(struct cpuinfo_x86 *c) if (!(msr & MSR_K7_HWCR_SMMLOCK)) goto clear_sev; + if (cpu_has(c, X86_FEATURE_SEV_SNP) && !early_rmptable_check()) + goto clear_snp; + return; clear_all: @@ -666,6 +684,7 @@ static void early_detect_mem_encrypt(struct cpuinfo_x86 *c) clear_sev: setup_clear_cpu_cap(X86_FEATURE_SEV); setup_clear_cpu_cap(X86_FEATURE_SEV_ES); +clear_snp: setup_clear_cpu_cap(X86_FEATURE_SEV_SNP); } } diff --git a/arch/x86/virt/svm/Makefile b/arch/x86/virt/svm/Makefile new file mode 100644 index 000000000000..ef2a31bdcc70 --- /dev/null +++ b/arch/x86/virt/svm/Makefile @@ -0,0 +1,3 @@ +# SPDX-License-Identifier: GPL-2.0 + +obj-$(CONFIG_KVM_AMD_SEV) += sev.o diff --git a/arch/x86/virt/svm/sev.c b/arch/x86/virt/svm/sev.c new file mode 100644 index 000000000000..8b9ed72489e4 --- /dev/null +++ b/arch/x86/virt/svm/sev.c @@ -0,0 +1,239 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * AMD SVM-SEV Host Support. + * + * Copyright (C) 2023 Advanced Micro Devices, Inc. + * + * Author: Ashish Kalra <ashish.kalra@amd.com> + * + */ + +#include <linux/cc_platform.h> +#include <linux/printk.h> +#include <linux/mm_types.h> +#include <linux/set_memory.h> +#include <linux/memblock.h> +#include <linux/kernel.h> +#include <linux/mm.h> +#include <linux/cpumask.h> +#include <linux/iommu.h> +#include <linux/amd-iommu.h> + +#include <asm/sev.h> +#include <asm/processor.h> +#include <asm/setup.h> +#include <asm/svm.h> +#include <asm/smp.h> +#include <asm/cpu.h> +#include <asm/apic.h> +#include <asm/cpuid.h> +#include <asm/cmdline.h> +#include <asm/iommu.h> + +/* + * The RMP entry format is not architectural. The format is defined in PPR + * Family 19h Model 01h, Rev B1 processor. + */ +struct rmpentry { + u64 assigned : 1, + pagesize : 1, + immutable : 1, + rsvd1 : 9, + gpa : 39, + asid : 10, + vmsa : 1, + validated : 1, + rsvd2 : 1; + u64 rsvd3; +} __packed; + +/* + * The first 16KB from the RMP_BASE is used by the processor for the + * bookkeeping, the range needs to be added during the RMP entry lookup. + */ +#define RMPTABLE_CPU_BOOKKEEPING_SZ 0x4000 + +static struct rmpentry *rmptable_start __ro_after_init; +static u64 rmptable_max_pfn __ro_after_init; + +#undef pr_fmt +#define pr_fmt(fmt) "SEV-SNP: " fmt + +static int __mfd_enable(unsigned int cpu) +{ + u64 val; + + if (!cpu_feature_enabled(X86_FEATURE_SEV_SNP)) + return 0; + + rdmsrl(MSR_AMD64_SYSCFG, val); + + val |= MSR_AMD64_SYSCFG_MFDM; + + wrmsrl(MSR_AMD64_SYSCFG, val); + + return 0; +} + +static __init void mfd_enable(void *arg) +{ + __mfd_enable(smp_processor_id()); +} + +static int __snp_enable(unsigned int cpu) +{ + u64 val; + + if (!cpu_feature_enabled(X86_FEATURE_SEV_SNP)) + return 0; + + rdmsrl(MSR_AMD64_SYSCFG, val); + + val |= MSR_AMD64_SYSCFG_SNP_EN; + val |= MSR_AMD64_SYSCFG_SNP_VMPL_EN; + + wrmsrl(MSR_AMD64_SYSCFG, val); + + return 0; +} + +static __init void snp_enable(void *arg) +{ + __snp_enable(smp_processor_id()); +} + +#define RMP_ADDR_MASK GENMASK_ULL(51, 13) + +bool snp_get_rmptable_info(u64 *start, u64 *len) +{ + u64 max_rmp_pfn, calc_rmp_sz, rmp_sz, rmp_base, rmp_end; + + rdmsrl(MSR_AMD64_RMP_BASE, rmp_base); + rdmsrl(MSR_AMD64_RMP_END, rmp_end); + + if (!(rmp_base & RMP_ADDR_MASK) || !(rmp_end & RMP_ADDR_MASK)) { + pr_err("Memory for the RMP table has not been reserved by BIOS\n"); + return false; + } + + if (rmp_base > rmp_end) { + pr_err("RMP configuration not valid: base=%#llx, end=%#llx\n", rmp_base, rmp_end); + return false; + } + + rmp_sz = rmp_end - rmp_base + 1; + + /* + * Calculate the amount the memory that must be reserved by the BIOS to + * address the whole RAM, including the bookkeeping area. The RMP itself + * must also be covered. + */ + max_rmp_pfn = max_pfn; + if (PHYS_PFN(rmp_end) > max_pfn) + max_rmp_pfn = PHYS_PFN(rmp_end); + + calc_rmp_sz = (max_rmp_pfn << 4) + RMPTABLE_CPU_BOOKKEEPING_SZ; + + if (calc_rmp_sz > rmp_sz) { + pr_err("Memory reserved for the RMP table does not cover full system RAM (expected 0x%llx got 0x%llx)\n", + calc_rmp_sz, rmp_sz); + return false; + } + + *start = rmp_base; + *len = rmp_sz; + + return true; +} + +static __init int __snp_rmptable_init(void) +{ + u64 rmp_base, rmp_size; + void *rmp_start; + u64 val; + + if (!snp_get_rmptable_info(&rmp_base, &rmp_size)) + return 1; + + pr_info("RMP table physical address [0x%016llx - 0x%016llx]\n", + rmp_base, rmp_base + rmp_size - 1); + + rmp_start = memremap(rmp_base, rmp_size, MEMREMAP_WB); + if (!rmp_start) { + pr_err("Failed to map RMP table addr 0x%llx size 0x%llx\n", rmp_base, rmp_size); + return 1; + } + + /* + * Check if SEV-SNP is already enabled, this can happen in case of + * kexec boot. + */ + rdmsrl(MSR_AMD64_SYSCFG, val); + if (val & MSR_AMD64_SYSCFG_SNP_EN) + goto skip_enable; + + /* Initialize the RMP table to zero */ + memset(rmp_start, 0, rmp_size); + + /* Flush the caches to ensure that data is written before SNP is enabled. */ + wbinvd_on_all_cpus(); + + /* MFDM must be enabled on all the CPUs prior to enabling SNP. */ + on_each_cpu(mfd_enable, NULL, 1); + + /* Enable SNP on all CPUs. */ + on_each_cpu(snp_enable, NULL, 1); + +skip_enable: + rmp_start += RMPTABLE_CPU_BOOKKEEPING_SZ; + rmp_size -= RMPTABLE_CPU_BOOKKEEPING_SZ; + + rmptable_start = (struct rmpentry *)rmp_start; + rmptable_max_pfn = rmp_size / sizeof(struct rmpentry) - 1; + + return 0; +} + +static int __init snp_rmptable_init(void) +{ + int family, model; + + if (!cpu_feature_enabled(X86_FEATURE_SEV_SNP)) + return 0; + + family = boot_cpu_data.x86; + model = boot_cpu_data.x86_model; + + /* + * RMP table entry format is not architectural and it can vary by processor and + * is defined by the per-processor PPR. Restrict SNP support on the known CPU + * model and family for which the RMP table entry format is currently defined for. + */ + if (family != 0x19 || model > 0xaf) + goto nosnp; + + if (amd_iommu_snp_enable()) + goto nosnp; + + if (__snp_rmptable_init()) + goto nosnp; + + cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "x86/rmptable_init:online", __snp_enable, NULL); + + return 0; + +nosnp: + setup_clear_cpu_cap(X86_FEATURE_SEV_SNP); + return -ENOSYS; +} + +/* + * This must be called after the PCI subsystem. This is because amd_iommu_snp_enable() + * is called to ensure the IOMMU supports the SEV-SNP feature, which can only be + * called after subsys_initcall(). + * + * NOTE: IOMMU is enforced by SNP to ensure that hypervisor cannot program DMA + * directly into guest private memory. In case of SNP, the IOMMU ensures that + * the page(s) used for DMA are hypervisor owned. + */ +fs_initcall(snp_rmptable_init); diff --git a/drivers/iommu/amd/init.c b/drivers/iommu/amd/init.c index 45efb7e5d725..1c9924de607a 100644 --- a/drivers/iommu/amd/init.c +++ b/drivers/iommu/amd/init.c @@ -3802,7 +3802,7 @@ int amd_iommu_pc_set_reg(struct amd_iommu *iommu, u8 bank, u8 cntr, u8 fxn, u64 return iommu_pc_get_set_reg(iommu, bank, cntr, fxn, value, true); } -#ifdef CONFIG_AMD_MEM_ENCRYPT +#ifdef CONFIG_KVM_AMD_SEV int amd_iommu_snp_enable(void) { /* diff --git a/include/linux/amd-iommu.h b/include/linux/amd-iommu.h index 99a5201d9e62..55fc03cb3968 100644 --- a/include/linux/amd-iommu.h +++ b/include/linux/amd-iommu.h @@ -205,7 +205,7 @@ int amd_iommu_pc_get_reg(struct amd_iommu *iommu, u8 bank, u8 cntr, u8 fxn, u64 *value); struct amd_iommu *get_amd_iommu(unsigned int idx); -#ifdef CONFIG_AMD_MEM_ENCRYPT +#ifdef CONFIG_KVM_AMD_SEV int amd_iommu_snp_enable(void); #endif