Message ID | 20240419085927.3648704-6-pbonzini@redhat.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | KVM: Guest Memory Pre-Population API | expand |
On 4/19/2024 4:59 PM, Paolo Bonzini wrote: > From: Isaku Yamahata <isaku.yamahata@intel.com> > > Wire KVM_PRE_FAULT_MEMORY ioctl to __kvm_mmu_do_page_fault() to populate guest > memory. It can be called right after KVM_CREATE_VCPU creates a vCPU, > since at that point kvm_mmu_create() and kvm_init_mmu() are called and > the vCPU is ready to invoke the KVM page fault handler. > > The helper function kvm_mmu_map_tdp_page take care of the logic to > process RET_PF_* return values and convert them to success or errno. > > Signed-off-by: Isaku Yamahata <isaku.yamahata@intel.com> > Message-ID: <9b866a0ae7147f96571c439e75429a03dcb659b6.1712785629.git.isaku.yamahata@intel.com> > Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> > --- > arch/x86/kvm/Kconfig | 1 + > arch/x86/kvm/mmu/mmu.c | 72 ++++++++++++++++++++++++++++++++++++++++++ > arch/x86/kvm/x86.c | 3 ++ > 3 files changed, 76 insertions(+) > > diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig > index 7632fe6e4db9..54c155432793 100644 > --- a/arch/x86/kvm/Kconfig > +++ b/arch/x86/kvm/Kconfig > @@ -44,6 +44,7 @@ config KVM > select KVM_VFIO > select HAVE_KVM_PM_NOTIFIER if PM > select KVM_GENERIC_HARDWARE_ENABLING > + select KVM_GENERIC_PRE_FAULT_MEMORY > help > Support hosting fully virtualized guest machines using hardware > virtualization extensions. You will need a fairly recent > diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c > index 10e90788b263..a045b23964c0 100644 > --- a/arch/x86/kvm/mmu/mmu.c > +++ b/arch/x86/kvm/mmu/mmu.c > @@ -4647,6 +4647,78 @@ int kvm_tdp_page_fault(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault) > return direct_page_fault(vcpu, fault); > } > > +static int kvm_tdp_map_page(struct kvm_vcpu *vcpu, gpa_t gpa, u64 error_code, > + u8 *level) > +{ > + int r; > + > + /* Restrict to TDP page fault. */ > + if (vcpu->arch.mmu->page_fault != kvm_tdp_page_fault) > + return -EOPNOTSUPP; > + > +retry: > + r = __kvm_mmu_do_page_fault(vcpu, gpa, error_code, true, NULL, level); > + if (r < 0) > + return r; > + > + switch (r) { > + case RET_PF_RETRY: > + if (signal_pending(current)) > + return -EINTR; > + cond_resched(); > + goto retry; > + > + case RET_PF_FIXED: > + case RET_PF_SPURIOUS: > + break; > + > + case RET_PF_EMULATE: > + return -ENOENT; > + > + case RET_PF_CONTINUE: > + case RET_PF_INVALID: > + default: > + WARN_ON_ONCE(r); > + return -EIO; Need to update patch 1 for -EIO > + } > + > + return 0; > +} > + > +long kvm_arch_vcpu_pre_fault_memory(struct kvm_vcpu *vcpu, > + struct kvm_pre_fault_memory *range) > +{ > + u64 error_code = PFERR_GUEST_FINAL_MASK; > + u8 level = PG_LEVEL_4K; > + u64 end; > + int r; > + > + /* > + * reload is efficient when called repeatedly, so we can do it on > + * every iteration. > + */ > + kvm_mmu_reload(vcpu); > + > + if (kvm_arch_has_private_mem(vcpu->kvm) && > + kvm_mem_is_private(vcpu->kvm, gpa_to_gfn(range->gpa))) > + error_code |= PFERR_PRIVATE_ACCESS; > + > + /* > + * Shadow paging uses GVA for kvm page fault, so restrict to > + * two-dimensional paging. > + */ > + r = kvm_tdp_map_page(vcpu, range->gpa, error_code, &level); > + if (r < 0) > + return r; > + > + /* > + * If the mapping that covers range->gpa can use a huge page, it > + * may start below it or end after range->gpa + range->size. > + */ > + end = (range->gpa & KVM_HPAGE_MASK(level)) + KVM_HPAGE_SIZE(level); > + return min(range->size, end - range->gpa); > +} > + > static void nonpaging_init_context(struct kvm_mmu *context) > { > context->page_fault = nonpaging_page_fault; > diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c > index 83b8260443a3..619ad713254e 100644 > --- a/arch/x86/kvm/x86.c > +++ b/arch/x86/kvm/x86.c > @@ -4715,6 +4715,9 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) > case KVM_CAP_MEMORY_FAULT_INFO: > r = 1; > break; > + case KVM_CAP_PRE_FAULT_MEMORY: > + r = tdp_enabled; > + break; > case KVM_CAP_EXIT_HYPERCALL: > r = KVM_EXIT_HYPERCALL_VALID_MASK; > break;
On Mon, Apr 22, 2024, Xiaoyao Li wrote: > On 4/19/2024 4:59 PM, Paolo Bonzini wrote: > > diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c > > index 10e90788b263..a045b23964c0 100644 > > --- a/arch/x86/kvm/mmu/mmu.c > > +++ b/arch/x86/kvm/mmu/mmu.c > > @@ -4647,6 +4647,78 @@ int kvm_tdp_page_fault(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault) > > return direct_page_fault(vcpu, fault); > > } > > +static int kvm_tdp_map_page(struct kvm_vcpu *vcpu, gpa_t gpa, u64 error_code, > > + u8 *level) Align parameters: static int kvm_tdp_map_page(struct kvm_vcpu *vcpu, gpa_t gpa, u64 error_code, u8 *level) > > +{ > > + int r; > > + > > + /* Restrict to TDP page fault. */ This is fairly obvious from the code, what might not be obvious is _why_. I'm also ok dropping the comment entirely, but it's easy enough to provide a hint to the reader. > > + if (vcpu->arch.mmu->page_fault != kvm_tdp_page_fault) > > + return -EOPNOTSUPP; > > + > > +retry: > > + r = __kvm_mmu_do_page_fault(vcpu, gpa, error_code, true, NULL, level); > > + if (r < 0) > > + return r; > > + > > + switch (r) { > > + case RET_PF_RETRY: > > + if (signal_pending(current)) > > + return -EINTR; > > + cond_resched(); > > + goto retry; Rather than a goto+retry from inside a switch statement, what about: int r; /* * Pre-faulting a GPA is supported only non-nested TDP, as indirect * MMUs map either GVAs or L2 GPAs, not L1 GPAs. */ if (vcpu->arch.mmu->page_fault != kvm_tdp_page_fault) return -EOPNOTSUPP; do { if (signal_pending(current)) return -EINTR; cond_resched(); r = kvm_mmu_do_page_fault(vcpu, gpa, error_code, true, NULL, level); } while (r == RET_PF_RETRY); switch (r) { case RET_PF_FIXED: case RET_PF_SPURIOUS: break; case RET_PF_EMULATE: return -ENOENT; case RET_PF_CONTINUE: case RET_PF_INVALID: case RET_PF_RETRY: default: WARN_ON_ONCE(r >= 0); return -EIO; } return 0;
diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig index 7632fe6e4db9..54c155432793 100644 --- a/arch/x86/kvm/Kconfig +++ b/arch/x86/kvm/Kconfig @@ -44,6 +44,7 @@ config KVM select KVM_VFIO select HAVE_KVM_PM_NOTIFIER if PM select KVM_GENERIC_HARDWARE_ENABLING + select KVM_GENERIC_PRE_FAULT_MEMORY help Support hosting fully virtualized guest machines using hardware virtualization extensions. You will need a fairly recent diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index 10e90788b263..a045b23964c0 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -4647,6 +4647,78 @@ int kvm_tdp_page_fault(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault) return direct_page_fault(vcpu, fault); } +static int kvm_tdp_map_page(struct kvm_vcpu *vcpu, gpa_t gpa, u64 error_code, + u8 *level) +{ + int r; + + /* Restrict to TDP page fault. */ + if (vcpu->arch.mmu->page_fault != kvm_tdp_page_fault) + return -EOPNOTSUPP; + +retry: + r = __kvm_mmu_do_page_fault(vcpu, gpa, error_code, true, NULL, level); + if (r < 0) + return r; + + switch (r) { + case RET_PF_RETRY: + if (signal_pending(current)) + return -EINTR; + cond_resched(); + goto retry; + + case RET_PF_FIXED: + case RET_PF_SPURIOUS: + break; + + case RET_PF_EMULATE: + return -ENOENT; + + case RET_PF_CONTINUE: + case RET_PF_INVALID: + default: + WARN_ON_ONCE(r); + return -EIO; + } + + return 0; +} + +long kvm_arch_vcpu_pre_fault_memory(struct kvm_vcpu *vcpu, + struct kvm_pre_fault_memory *range) +{ + u64 error_code = PFERR_GUEST_FINAL_MASK; + u8 level = PG_LEVEL_4K; + u64 end; + int r; + + /* + * reload is efficient when called repeatedly, so we can do it on + * every iteration. + */ + kvm_mmu_reload(vcpu); + + if (kvm_arch_has_private_mem(vcpu->kvm) && + kvm_mem_is_private(vcpu->kvm, gpa_to_gfn(range->gpa))) + error_code |= PFERR_PRIVATE_ACCESS; + + /* + * Shadow paging uses GVA for kvm page fault, so restrict to + * two-dimensional paging. + */ + r = kvm_tdp_map_page(vcpu, range->gpa, error_code, &level); + if (r < 0) + return r; + + /* + * If the mapping that covers range->gpa can use a huge page, it + * may start below it or end after range->gpa + range->size. + */ + end = (range->gpa & KVM_HPAGE_MASK(level)) + KVM_HPAGE_SIZE(level); + return min(range->size, end - range->gpa); +} + static void nonpaging_init_context(struct kvm_mmu *context) { context->page_fault = nonpaging_page_fault; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 83b8260443a3..619ad713254e 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -4715,6 +4715,9 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) case KVM_CAP_MEMORY_FAULT_INFO: r = 1; break; + case KVM_CAP_PRE_FAULT_MEMORY: + r = tdp_enabled; + break; case KVM_CAP_EXIT_HYPERCALL: r = KVM_EXIT_HYPERCALL_VALID_MASK; break;