Message ID | 20220321234844.1543161-12-bgardon@google.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | KVM: x86: Add a cap to disable NX hugepages on a VM | expand |
On Mon, Mar 21, 2022 at 04:48:44PM -0700, Ben Gardon wrote: > Ensure that the userspace actor attempting to disable NX hugepages has > permission to reboot the system. Since disabling NX hugepages would > allow a guest to crash the system, it is similar to reboot permissions. > > This approach is the simplest permission gating, but passing a file > descriptor opened for write for the module parameter would also work > well and be more precise. > The latter approach was suggested by Sean Christopherson. > > Suggested-by: Jim Mattson <jmattson@google.com> > Signed-off-by: Ben Gardon <bgardon@google.com> > --- > arch/x86/kvm/x86.c | 18 ++++++- > .../selftests/kvm/include/kvm_util_base.h | 2 + > tools/testing/selftests/kvm/lib/kvm_util.c | 7 +++ > .../selftests/kvm/x86_64/nx_huge_pages_test.c | 49 ++++++++++++++----- > .../kvm/x86_64/nx_huge_pages_test.sh | 2 +- > 5 files changed, 65 insertions(+), 13 deletions(-) > > diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c > index 74351cbb9b5b..995f30667619 100644 > --- a/arch/x86/kvm/x86.c > +++ b/arch/x86/kvm/x86.c > @@ -4256,7 +4256,6 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) > case KVM_CAP_SYS_ATTRIBUTES: > case KVM_CAP_VAPIC: > case KVM_CAP_ENABLE_CAP: > - case KVM_CAP_VM_DISABLE_NX_HUGE_PAGES: > r = 1; > break; > case KVM_CAP_EXIT_HYPERCALL: > @@ -4359,6 +4358,14 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) > case KVM_CAP_DISABLE_QUIRKS2: > r = KVM_X86_VALID_QUIRKS; > break; > + case KVM_CAP_VM_DISABLE_NX_HUGE_PAGES: > + /* > + * Since the risk of disabling NX hugepages is a guest crashing > + * the system, ensure the userspace process has permission to > + * reboot the system. > + */ > + r = capable(CAP_SYS_BOOT); Duplicating this check and comment isn't ideal. I think it would be fine to unconditionally return true here (KVM, after all, does support the capability) and only check for CAP_SYS_BOOT when userspace attempts to enable the capability. > + break; > default: > break; > } > @@ -6050,6 +6057,15 @@ int kvm_vm_ioctl_enable_cap(struct kvm *kvm, > mutex_unlock(&kvm->lock); > break; > case KVM_CAP_VM_DISABLE_NX_HUGE_PAGES: > + /* > + * Since the risk of disabling NX hugepages is a guest crashing > + * the system, ensure the userspace process has permission to > + * reboot the system. > + */ > + if (!capable(CAP_SYS_BOOT)) { > + r = -EPERM; > + break; > + } > kvm->arch.disable_nx_huge_pages = true; > kvm_update_nx_huge_pages(kvm); > r = 0; > diff --git a/tools/testing/selftests/kvm/include/kvm_util_base.h b/tools/testing/selftests/kvm/include/kvm_util_base.h > index 72163ba2f878..4db8251c3ce5 100644 > --- a/tools/testing/selftests/kvm/include/kvm_util_base.h > +++ b/tools/testing/selftests/kvm/include/kvm_util_base.h Can you split out the selftests changes to a separate commit? I have a feeling you meant to :). > @@ -411,4 +411,6 @@ uint64_t vm_get_single_stat(struct kvm_vm *vm, const char *stat_name); > > uint32_t guest_get_vcpuid(void); > > +void vm_disable_nx_huge_pages(struct kvm_vm *vm); > + > #endif /* SELFTEST_KVM_UTIL_BASE_H */ > diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c > index 9d72d1bb34fa..46a7fa08d3e0 100644 > --- a/tools/testing/selftests/kvm/lib/kvm_util.c > +++ b/tools/testing/selftests/kvm/lib/kvm_util.c > @@ -2765,3 +2765,10 @@ uint64_t vm_get_single_stat(struct kvm_vm *vm, const char *stat_name) > return value; > } > > +void vm_disable_nx_huge_pages(struct kvm_vm *vm) > +{ > + struct kvm_enable_cap cap = { 0 }; > + > + cap.cap = KVM_CAP_VM_DISABLE_NX_HUGE_PAGES; > + vm_enable_cap(vm, &cap); > +} > diff --git a/tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.c b/tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.c > index 2bcbe4efdc6a..5ce98f759bc8 100644 > --- a/tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.c > +++ b/tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.c Will you add a test to exercise the CAP_SYS_BOOT check? At minimum the selftest should check if it has CAP_SYS_BOOT and act accordingly (e.g. exiting with KSFT_SKIP). > @@ -57,13 +57,40 @@ static void check_split_count(struct kvm_vm *vm, int expected_splits) > expected_splits, actual_splits); > } > > +static void help(void) > +{ > + puts(""); > + printf("usage: nx_huge_pages_test.sh [-x]\n"); > + puts(""); > + printf(" -x: Allow executable huge pages on the VM.\n"); > + puts(""); > + exit(0); > +} > + > int main(int argc, char **argv) > { > struct kvm_vm *vm; > struct timespec ts; > + bool disable_nx = false; > + int opt; > + > + while ((opt = getopt(argc, argv, "x")) != -1) { > + switch (opt) { > + case 'x': > + disable_nx = true; > + break; > + case 'h': > + default: > + help(); > + break; > + } > + } > > vm = vm_create(VM_MODE_DEFAULT, DEFAULT_GUEST_PHY_PAGES, O_RDWR); > > + if (disable_nx) > + vm_disable_nx_huge_pages(vm); > + > vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS_HUGETLB, > HPAGE_PADDR_START, HPAGE_SLOT, > HPAGE_SLOT_NPAGES, 0); > @@ -83,21 +110,21 @@ int main(int argc, char **argv) > * at 2M. > */ > run_guest_code(vm, guest_code0); > - check_2m_page_count(vm, 2); > - check_split_count(vm, 2); > + check_2m_page_count(vm, disable_nx ? 4 : 2); > + check_split_count(vm, disable_nx ? 0 : 2); > > /* > * guest_code1 is in the same huge page as data1, so it will cause > * that huge page to be remapped at 4k. > */ > run_guest_code(vm, guest_code1); > - check_2m_page_count(vm, 1); > - check_split_count(vm, 3); > + check_2m_page_count(vm, disable_nx ? 4 : 1); > + check_split_count(vm, disable_nx ? 0 : 3); > > /* Run guest_code0 again to check that is has no effect. */ > run_guest_code(vm, guest_code0); > - check_2m_page_count(vm, 1); > - check_split_count(vm, 3); > + check_2m_page_count(vm, disable_nx ? 4 : 1); > + check_split_count(vm, disable_nx ? 0 : 3); > > /* > * Give recovery thread time to run. The wrapper script sets > @@ -110,7 +137,7 @@ int main(int argc, char **argv) > /* > * Now that the reclaimer has run, all the split pages should be gone. > */ > - check_2m_page_count(vm, 1); > + check_2m_page_count(vm, disable_nx ? 4 : 1); > check_split_count(vm, 0); > > /* > @@ -118,13 +145,13 @@ int main(int argc, char **argv) > * again to check that pages are mapped at 2M again. > */ > run_guest_code(vm, guest_code0); > - check_2m_page_count(vm, 2); > - check_split_count(vm, 2); > + check_2m_page_count(vm, disable_nx ? 4 : 2); > + check_split_count(vm, disable_nx ? 0 : 2); > > /* Pages are once again split from running guest_code1. */ > run_guest_code(vm, guest_code1); > - check_2m_page_count(vm, 1); > - check_split_count(vm, 3); > + check_2m_page_count(vm, disable_nx ? 4 : 1); > + check_split_count(vm, disable_nx ? 0 : 3); > > kvm_vm_free(vm); > > diff --git a/tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.sh b/tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.sh > index 19fc95723fcb..29f999f48848 100755 > --- a/tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.sh > +++ b/tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.sh > @@ -14,7 +14,7 @@ echo 1 > /sys/module/kvm/parameters/nx_huge_pages_recovery_ratio > echo 100 > /sys/module/kvm/parameters/nx_huge_pages_recovery_period_ms > echo 200 > /sys/kernel/mm/hugepages/hugepages-2048kB/nr_hugepages > > -./nx_huge_pages_test > +./nx_huge_pages_test "${@}" > RET=$? > > echo $NX_HUGE_PAGES > /sys/module/kvm/parameters/nx_huge_pages > -- > 2.35.1.894.gb6a874cedc-goog >
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 74351cbb9b5b..995f30667619 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -4256,7 +4256,6 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) case KVM_CAP_SYS_ATTRIBUTES: case KVM_CAP_VAPIC: case KVM_CAP_ENABLE_CAP: - case KVM_CAP_VM_DISABLE_NX_HUGE_PAGES: r = 1; break; case KVM_CAP_EXIT_HYPERCALL: @@ -4359,6 +4358,14 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) case KVM_CAP_DISABLE_QUIRKS2: r = KVM_X86_VALID_QUIRKS; break; + case KVM_CAP_VM_DISABLE_NX_HUGE_PAGES: + /* + * Since the risk of disabling NX hugepages is a guest crashing + * the system, ensure the userspace process has permission to + * reboot the system. + */ + r = capable(CAP_SYS_BOOT); + break; default: break; } @@ -6050,6 +6057,15 @@ int kvm_vm_ioctl_enable_cap(struct kvm *kvm, mutex_unlock(&kvm->lock); break; case KVM_CAP_VM_DISABLE_NX_HUGE_PAGES: + /* + * Since the risk of disabling NX hugepages is a guest crashing + * the system, ensure the userspace process has permission to + * reboot the system. + */ + if (!capable(CAP_SYS_BOOT)) { + r = -EPERM; + break; + } kvm->arch.disable_nx_huge_pages = true; kvm_update_nx_huge_pages(kvm); r = 0; diff --git a/tools/testing/selftests/kvm/include/kvm_util_base.h b/tools/testing/selftests/kvm/include/kvm_util_base.h index 72163ba2f878..4db8251c3ce5 100644 --- a/tools/testing/selftests/kvm/include/kvm_util_base.h +++ b/tools/testing/selftests/kvm/include/kvm_util_base.h @@ -411,4 +411,6 @@ uint64_t vm_get_single_stat(struct kvm_vm *vm, const char *stat_name); uint32_t guest_get_vcpuid(void); +void vm_disable_nx_huge_pages(struct kvm_vm *vm); + #endif /* SELFTEST_KVM_UTIL_BASE_H */ diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c index 9d72d1bb34fa..46a7fa08d3e0 100644 --- a/tools/testing/selftests/kvm/lib/kvm_util.c +++ b/tools/testing/selftests/kvm/lib/kvm_util.c @@ -2765,3 +2765,10 @@ uint64_t vm_get_single_stat(struct kvm_vm *vm, const char *stat_name) return value; } +void vm_disable_nx_huge_pages(struct kvm_vm *vm) +{ + struct kvm_enable_cap cap = { 0 }; + + cap.cap = KVM_CAP_VM_DISABLE_NX_HUGE_PAGES; + vm_enable_cap(vm, &cap); +} diff --git a/tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.c b/tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.c index 2bcbe4efdc6a..5ce98f759bc8 100644 --- a/tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.c +++ b/tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.c @@ -57,13 +57,40 @@ static void check_split_count(struct kvm_vm *vm, int expected_splits) expected_splits, actual_splits); } +static void help(void) +{ + puts(""); + printf("usage: nx_huge_pages_test.sh [-x]\n"); + puts(""); + printf(" -x: Allow executable huge pages on the VM.\n"); + puts(""); + exit(0); +} + int main(int argc, char **argv) { struct kvm_vm *vm; struct timespec ts; + bool disable_nx = false; + int opt; + + while ((opt = getopt(argc, argv, "x")) != -1) { + switch (opt) { + case 'x': + disable_nx = true; + break; + case 'h': + default: + help(); + break; + } + } vm = vm_create(VM_MODE_DEFAULT, DEFAULT_GUEST_PHY_PAGES, O_RDWR); + if (disable_nx) + vm_disable_nx_huge_pages(vm); + vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS_HUGETLB, HPAGE_PADDR_START, HPAGE_SLOT, HPAGE_SLOT_NPAGES, 0); @@ -83,21 +110,21 @@ int main(int argc, char **argv) * at 2M. */ run_guest_code(vm, guest_code0); - check_2m_page_count(vm, 2); - check_split_count(vm, 2); + check_2m_page_count(vm, disable_nx ? 4 : 2); + check_split_count(vm, disable_nx ? 0 : 2); /* * guest_code1 is in the same huge page as data1, so it will cause * that huge page to be remapped at 4k. */ run_guest_code(vm, guest_code1); - check_2m_page_count(vm, 1); - check_split_count(vm, 3); + check_2m_page_count(vm, disable_nx ? 4 : 1); + check_split_count(vm, disable_nx ? 0 : 3); /* Run guest_code0 again to check that is has no effect. */ run_guest_code(vm, guest_code0); - check_2m_page_count(vm, 1); - check_split_count(vm, 3); + check_2m_page_count(vm, disable_nx ? 4 : 1); + check_split_count(vm, disable_nx ? 0 : 3); /* * Give recovery thread time to run. The wrapper script sets @@ -110,7 +137,7 @@ int main(int argc, char **argv) /* * Now that the reclaimer has run, all the split pages should be gone. */ - check_2m_page_count(vm, 1); + check_2m_page_count(vm, disable_nx ? 4 : 1); check_split_count(vm, 0); /* @@ -118,13 +145,13 @@ int main(int argc, char **argv) * again to check that pages are mapped at 2M again. */ run_guest_code(vm, guest_code0); - check_2m_page_count(vm, 2); - check_split_count(vm, 2); + check_2m_page_count(vm, disable_nx ? 4 : 2); + check_split_count(vm, disable_nx ? 0 : 2); /* Pages are once again split from running guest_code1. */ run_guest_code(vm, guest_code1); - check_2m_page_count(vm, 1); - check_split_count(vm, 3); + check_2m_page_count(vm, disable_nx ? 4 : 1); + check_split_count(vm, disable_nx ? 0 : 3); kvm_vm_free(vm); diff --git a/tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.sh b/tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.sh index 19fc95723fcb..29f999f48848 100755 --- a/tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.sh +++ b/tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.sh @@ -14,7 +14,7 @@ echo 1 > /sys/module/kvm/parameters/nx_huge_pages_recovery_ratio echo 100 > /sys/module/kvm/parameters/nx_huge_pages_recovery_period_ms echo 200 > /sys/kernel/mm/hugepages/hugepages-2048kB/nr_hugepages -./nx_huge_pages_test +./nx_huge_pages_test "${@}" RET=$? echo $NX_HUGE_PAGES > /sys/module/kvm/parameters/nx_huge_pages
Ensure that the userspace actor attempting to disable NX hugepages has permission to reboot the system. Since disabling NX hugepages would allow a guest to crash the system, it is similar to reboot permissions. This approach is the simplest permission gating, but passing a file descriptor opened for write for the module parameter would also work well and be more precise. The latter approach was suggested by Sean Christopherson. Suggested-by: Jim Mattson <jmattson@google.com> Signed-off-by: Ben Gardon <bgardon@google.com> --- arch/x86/kvm/x86.c | 18 ++++++- .../selftests/kvm/include/kvm_util_base.h | 2 + tools/testing/selftests/kvm/lib/kvm_util.c | 7 +++ .../selftests/kvm/x86_64/nx_huge_pages_test.c | 49 ++++++++++++++----- .../kvm/x86_64/nx_huge_pages_test.sh | 2 +- 5 files changed, 65 insertions(+), 13 deletions(-)