@@ -209,6 +209,7 @@
#define MSR_IA32_EBL_CR_POWERON 0x0000002a
#define MSR_IA32_FEATURE_CONTROL 0x0000003a
#define MSR_IA32_TSC_ADJUST 0x0000003b
+#define MSR_IA32_PKRS 0x000006e1
#define FEATURE_CONTROL_LOCKED (1<<0)
#define FEATURE_CONTROL_VMXON_ENABLED_INSIDE_SMX (1<<1)
@@ -50,6 +50,7 @@
#define X86_CR4_SMEP 0x00100000
#define X86_CR4_SMAP 0x00200000
#define X86_CR4_PKE 0x00400000
+#define X86_CR4_PKS 0x01000000
#define X86_EFLAGS_CF 0x00000001
#define X86_EFLAGS_FIXED 0x00000002
@@ -157,6 +158,7 @@ static inline u8 cpuid_maxphyaddr(void)
#define X86_FEATURE_RDPID (CPUID(0x7, 0, ECX, 22))
#define X86_FEATURE_SPEC_CTRL (CPUID(0x7, 0, EDX, 26))
#define X86_FEATURE_ARCH_CAPABILITIES (CPUID(0x7, 0, EDX, 29))
+#define X86_FEATURE_PKS (CPUID(0x7, 0, ECX, 31))
#define X86_FEATURE_NX (CPUID(0x80000001, 0, EDX, 20))
#define X86_FEATURE_RDPRU (CPUID(0x80000008, 0, EBX, 4))
@@ -20,6 +20,7 @@ tests += $(TEST_DIR)/tscdeadline_latency.flat
tests += $(TEST_DIR)/intel-iommu.flat
tests += $(TEST_DIR)/vmware_backdoors.flat
tests += $(TEST_DIR)/rdpru.flat
+tests += $(TEST_DIR)/pks.flat
include $(SRCDIR)/$(TEST_DIR)/Makefile.common
new file mode 100644
@@ -0,0 +1,146 @@
+#include "libcflat.h"
+#include "x86/desc.h"
+#include "x86/processor.h"
+#include "x86/vm.h"
+#include "x86/msr.h"
+
+#define CR0_WP_MASK (1UL << 16)
+#define PTE_PKEY_BIT 59
+#define SUPER_BASE (1 << 24)
+#define SUPER_VAR(v) (*((__typeof__(&(v))) (((unsigned long)&v) + SUPER_BASE)))
+
+volatile int pf_count = 0;
+volatile unsigned save;
+volatile unsigned test;
+
+static void set_cr0_wp(int wp)
+{
+ unsigned long cr0 = read_cr0();
+
+ cr0 &= ~CR0_WP_MASK;
+ if (wp)
+ cr0 |= CR0_WP_MASK;
+ write_cr0(cr0);
+}
+
+void do_pf_tss(unsigned long error_code);
+void do_pf_tss(unsigned long error_code)
+{
+ printf("#PF handler, error code: 0x%lx\n", error_code);
+ pf_count++;
+ save = test;
+ wrmsr(MSR_IA32_PKRS, 0);
+}
+
+extern void pf_tss(void);
+
+asm ("pf_tss: \n\t"
+#ifdef __x86_64__
+ // no task on x86_64, save/restore caller-save regs
+ "push %rax; push %rcx; push %rdx; push %rsi; push %rdi\n"
+ "push %r8; push %r9; push %r10; push %r11\n"
+ "mov 9*8(%rsp), %rdi\n"
+#endif
+ "call do_pf_tss \n\t"
+#ifdef __x86_64__
+ "pop %r11; pop %r10; pop %r9; pop %r8\n"
+ "pop %rdi; pop %rsi; pop %rdx; pop %rcx; pop %rax\n"
+#endif
+ "add $"S", %"R "sp\n\t" // discard error code
+ "iret"W" \n\t"
+ "jmp pf_tss\n\t"
+ );
+
+static void init_test(void)
+{
+ pf_count = 0;
+
+ invlpg(&test);
+ invlpg(&SUPER_VAR(test));
+ wrmsr(MSR_IA32_PKRS, 0);
+ set_cr0_wp(0);
+}
+
+int main(int ac, char **av)
+{
+ unsigned long i;
+ unsigned int pkey = 0x2;
+ unsigned int pkrs_ad = 0x10;
+ unsigned int pkrs_wd = 0x20;
+
+ if (!this_cpu_has(X86_FEATURE_PKS)) {
+ printf("PKS not enabled\n");
+ return report_summary();
+ }
+
+ setup_vm();
+ setup_alt_stack();
+ set_intr_alt_stack(14, pf_tss);
+ wrmsr(MSR_EFER, rdmsr(MSR_EFER) | EFER_LMA);
+
+ // First 16MB are user pages
+ for (i = 0; i < SUPER_BASE; i += PAGE_SIZE) {
+ *get_pte(phys_to_virt(read_cr3()), phys_to_virt(i)) |= ((unsigned long)pkey << PTE_PKEY_BIT);
+ invlpg((void *)i);
+ }
+
+ // Present the same 16MB as supervisor pages in the 16MB-32MB range
+ for (i = SUPER_BASE; i < 2 * SUPER_BASE; i += PAGE_SIZE) {
+ *get_pte(phys_to_virt(read_cr3()), phys_to_virt(i)) &= ~SUPER_BASE;
+ *get_pte(phys_to_virt(read_cr3()), phys_to_virt(i)) &= ~PT_USER_MASK;
+ *get_pte(phys_to_virt(read_cr3()), phys_to_virt(i)) |= ((unsigned long)pkey << PTE_PKEY_BIT);
+ invlpg((void *)i);
+ }
+
+ write_cr4(read_cr4() | X86_CR4_PKS);
+ write_cr3(read_cr3());
+
+ init_test();
+ set_cr0_wp(1);
+ wrmsr(MSR_IA32_PKRS, pkrs_ad);
+ SUPER_VAR(test) = 21;
+ report(pf_count == 1 && test == 21 && save == 0,
+ "write to supervisor page when pkrs is ad and wp == 1");
+
+ init_test();
+ set_cr0_wp(0);
+ wrmsr(MSR_IA32_PKRS, pkrs_ad);
+ SUPER_VAR(test) = 22;
+ report(pf_count == 1 && test == 22 && save == 21,
+ "write to supervisor page when pkrs is ad and wp == 0");
+
+ init_test();
+ set_cr0_wp(1);
+ wrmsr(MSR_IA32_PKRS, pkrs_wd);
+ SUPER_VAR(test) = 23;
+ report(pf_count == 1 && test == 23 && save == 22,
+ "write to supervisor page when pkrs is wd and wp == 1");
+
+ init_test();
+ set_cr0_wp(0);
+ wrmsr(MSR_IA32_PKRS, pkrs_wd);
+ SUPER_VAR(test) = 24;
+ report(pf_count == 0 && test == 24,
+ "write to supervisor page when pkrs is wd and wp == 0");
+
+ init_test();
+ set_cr0_wp(0);
+ wrmsr(MSR_IA32_PKRS, pkrs_wd);
+ test = 25;
+ report(pf_count == 0 && test == 25,
+ "write to user page when pkrs is wd and wp == 0");
+
+ init_test();
+ set_cr0_wp(1);
+ wrmsr(MSR_IA32_PKRS, pkrs_wd);
+ test = 26;
+ report(pf_count == 0 && test == 26,
+ "write to user page when pkrs is wd and wp == 1");
+
+ init_test();
+ wrmsr(MSR_IA32_PKRS, pkrs_ad);
+ (void)((__typeof__(&(test))) (((unsigned long)&test)));
+ report(pf_count == 0, "read from user page when pkrs is ad");
+
+ return report_summary();
+}
@@ -127,6 +127,11 @@ file = pku.flat
arch = x86_64
extra_params = -cpu host
+[pks]
+file = pks.flat
+arch = x86_64
+extra_params = -cpu host
+
[asyncpf]
file = asyncpf.flat
extra_params = -m 2048
This unit-test is intended to test the KVM support for Protection Keys for Supervisor Pages (PKS). If CR4.PKS is set in long mode, supervisor pkeys are checked in addition to normal paging protections and Access or Write can be disabled via a MSR update without TLB flushes when permissions change. Signed-off-by: Chenyi Qiang <chenyi.qiang@intel.com> --- lib/x86/msr.h | 1 + lib/x86/processor.h | 2 + x86/Makefile.x86_64 | 1 + x86/pks.c | 146 ++++++++++++++++++++++++++++++++++++++++++++ x86/unittests.cfg | 5 ++ 5 files changed, 155 insertions(+) create mode 100644 x86/pks.c