diff mbox series

[5/6] KVM: x86: Implement kvm_arch_vcpu_pre_fault_memory()

Message ID 20240419085927.3648704-6-pbonzini@redhat.com (mailing list archive)
State New, archived
Headers show
Series KVM: Guest Memory Pre-Population API | expand

Commit Message

Paolo Bonzini April 19, 2024, 8:59 a.m. UTC
From: Isaku Yamahata <isaku.yamahata@intel.com>

Wire KVM_PRE_FAULT_MEMORY ioctl to __kvm_mmu_do_page_fault() to populate guest
memory.  It can be called right after KVM_CREATE_VCPU creates a vCPU,
since at that point kvm_mmu_create() and kvm_init_mmu() are called and
the vCPU is ready to invoke the KVM page fault handler.

The helper function kvm_mmu_map_tdp_page take care of the logic to
process RET_PF_* return values and convert them to success or errno.

Signed-off-by: Isaku Yamahata <isaku.yamahata@intel.com>
Message-ID: <9b866a0ae7147f96571c439e75429a03dcb659b6.1712785629.git.isaku.yamahata@intel.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/Kconfig   |  1 +
 arch/x86/kvm/mmu/mmu.c | 72 ++++++++++++++++++++++++++++++++++++++++++
 arch/x86/kvm/x86.c     |  3 ++
 3 files changed, 76 insertions(+)

Comments

Xiaoyao Li April 22, 2024, 3:37 p.m. UTC | #1
On 4/19/2024 4:59 PM, Paolo Bonzini wrote:
> From: Isaku Yamahata <isaku.yamahata@intel.com>
> 
> Wire KVM_PRE_FAULT_MEMORY ioctl to __kvm_mmu_do_page_fault() to populate guest
> memory.  It can be called right after KVM_CREATE_VCPU creates a vCPU,
> since at that point kvm_mmu_create() and kvm_init_mmu() are called and
> the vCPU is ready to invoke the KVM page fault handler.
> 
> The helper function kvm_mmu_map_tdp_page take care of the logic to
> process RET_PF_* return values and convert them to success or errno.
> 
> Signed-off-by: Isaku Yamahata <isaku.yamahata@intel.com>
> Message-ID: <9b866a0ae7147f96571c439e75429a03dcb659b6.1712785629.git.isaku.yamahata@intel.com>
> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
> ---
>   arch/x86/kvm/Kconfig   |  1 +
>   arch/x86/kvm/mmu/mmu.c | 72 ++++++++++++++++++++++++++++++++++++++++++
>   arch/x86/kvm/x86.c     |  3 ++
>   3 files changed, 76 insertions(+)
> 
> diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig
> index 7632fe6e4db9..54c155432793 100644
> --- a/arch/x86/kvm/Kconfig
> +++ b/arch/x86/kvm/Kconfig
> @@ -44,6 +44,7 @@ config KVM
>   	select KVM_VFIO
>   	select HAVE_KVM_PM_NOTIFIER if PM
>   	select KVM_GENERIC_HARDWARE_ENABLING
> +	select KVM_GENERIC_PRE_FAULT_MEMORY
>   	help
>   	  Support hosting fully virtualized guest machines using hardware
>   	  virtualization extensions.  You will need a fairly recent
> diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
> index 10e90788b263..a045b23964c0 100644
> --- a/arch/x86/kvm/mmu/mmu.c
> +++ b/arch/x86/kvm/mmu/mmu.c
> @@ -4647,6 +4647,78 @@ int kvm_tdp_page_fault(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault)
>   	return direct_page_fault(vcpu, fault);
>   }
>   
> +static int kvm_tdp_map_page(struct kvm_vcpu *vcpu, gpa_t gpa, u64 error_code,
> +		     u8 *level)
> +{
> +	int r;
> +
> +	/* Restrict to TDP page fault. */
> +	if (vcpu->arch.mmu->page_fault != kvm_tdp_page_fault)
> +		return -EOPNOTSUPP;
> +
> +retry:
> +	r = __kvm_mmu_do_page_fault(vcpu, gpa, error_code, true, NULL, level);
> +	if (r < 0)
> +		return r;
> +
> +	switch (r) {
> +	case RET_PF_RETRY:
> +		if (signal_pending(current))
> +			return -EINTR;
> +		cond_resched();
> +		goto retry;
> +
> +	case RET_PF_FIXED:
> +	case RET_PF_SPURIOUS:
> +		break;
> +
> +	case RET_PF_EMULATE:
> +		return -ENOENT;
> +
> +	case RET_PF_CONTINUE:
> +	case RET_PF_INVALID:
> +	default:
> +		WARN_ON_ONCE(r);
> +		return -EIO;

Need to update patch 1 for -EIO

> +	}
> +
> +	return 0;
> +}
> +
> +long kvm_arch_vcpu_pre_fault_memory(struct kvm_vcpu *vcpu,
> +				    struct kvm_pre_fault_memory *range)
> +{
> +	u64 error_code = PFERR_GUEST_FINAL_MASK;
> +	u8 level = PG_LEVEL_4K;
> +	u64 end;
> +	int r;
> +
> +	/*
> +	 * reload is efficient when called repeatedly, so we can do it on
> +	 * every iteration.
> +	 */
> +	kvm_mmu_reload(vcpu);
> +
> +	if (kvm_arch_has_private_mem(vcpu->kvm) &&
> +	    kvm_mem_is_private(vcpu->kvm, gpa_to_gfn(range->gpa)))
> +		error_code |= PFERR_PRIVATE_ACCESS;
> +
> +	/*
> +	 * Shadow paging uses GVA for kvm page fault, so restrict to
> +	 * two-dimensional paging.
> +	 */
> +	r = kvm_tdp_map_page(vcpu, range->gpa, error_code, &level);
> +	if (r < 0)
> +		return r;
> +
> +	/*
> +	 * If the mapping that covers range->gpa can use a huge page, it
> +	 * may start below it or end after range->gpa + range->size.
> +	 */
> +	end = (range->gpa & KVM_HPAGE_MASK(level)) + KVM_HPAGE_SIZE(level);
> +	return min(range->size, end - range->gpa);
> +}
> +
>   static void nonpaging_init_context(struct kvm_mmu *context)
>   {
>   	context->page_fault = nonpaging_page_fault;
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index 83b8260443a3..619ad713254e 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -4715,6 +4715,9 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
>   	case KVM_CAP_MEMORY_FAULT_INFO:
>   		r = 1;
>   		break;
> +	case KVM_CAP_PRE_FAULT_MEMORY:
> +		r = tdp_enabled;
> +		break;
>   	case KVM_CAP_EXIT_HYPERCALL:
>   		r = KVM_EXIT_HYPERCALL_VALID_MASK;
>   		break;
Sean Christopherson June 12, 2024, 9:02 p.m. UTC | #2
On Mon, Apr 22, 2024, Xiaoyao Li wrote:
> On 4/19/2024 4:59 PM, Paolo Bonzini wrote:
> > diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
> > index 10e90788b263..a045b23964c0 100644
> > --- a/arch/x86/kvm/mmu/mmu.c
> > +++ b/arch/x86/kvm/mmu/mmu.c
> > @@ -4647,6 +4647,78 @@ int kvm_tdp_page_fault(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault)
> >   	return direct_page_fault(vcpu, fault);
> >   }
> > +static int kvm_tdp_map_page(struct kvm_vcpu *vcpu, gpa_t gpa, u64 error_code,
> > +		     u8 *level)

Align parameters:

static int kvm_tdp_map_page(struct kvm_vcpu *vcpu, gpa_t gpa, u64 error_code,
			    u8 *level)

> > +{
> > +	int r;
> > +
> > +	/* Restrict to TDP page fault. */

This is fairly obvious from the code, what might not be obvious is _why_.  I'm
also ok dropping the comment entirely, but it's easy enough to provide a hint to
the reader.

> > +	if (vcpu->arch.mmu->page_fault != kvm_tdp_page_fault)
> > +		return -EOPNOTSUPP;
> > +
> > +retry:
> > +	r = __kvm_mmu_do_page_fault(vcpu, gpa, error_code, true, NULL, level);
> > +	if (r < 0)
> > +		return r;
> > +
> > +	switch (r) {
> > +	case RET_PF_RETRY:
> > +		if (signal_pending(current))
> > +			return -EINTR;
> > +		cond_resched();
> > +		goto retry;

Rather than a goto+retry from inside a switch statement, what about:

	int r;

	/* 
	 * Pre-faulting a GPA is supported only non-nested TDP, as indirect
	 * MMUs map either GVAs or L2 GPAs, not L1 GPAs.
	 */
	if (vcpu->arch.mmu->page_fault != kvm_tdp_page_fault)
		return -EOPNOTSUPP;

	do {
		if (signal_pending(current))
			return -EINTR;

		cond_resched();

		r = kvm_mmu_do_page_fault(vcpu, gpa, error_code, true, NULL, level);
	} while (r == RET_PF_RETRY);

	switch (r) {
	case RET_PF_FIXED:
	case RET_PF_SPURIOUS:
		break;

	case RET_PF_EMULATE:
		return -ENOENT;

	case RET_PF_CONTINUE:
	case RET_PF_INVALID:
	case RET_PF_RETRY:
	default:
		WARN_ON_ONCE(r >= 0);
		return -EIO;
	}

	return 0;
diff mbox series

Patch

diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig
index 7632fe6e4db9..54c155432793 100644
--- a/arch/x86/kvm/Kconfig
+++ b/arch/x86/kvm/Kconfig
@@ -44,6 +44,7 @@  config KVM
 	select KVM_VFIO
 	select HAVE_KVM_PM_NOTIFIER if PM
 	select KVM_GENERIC_HARDWARE_ENABLING
+	select KVM_GENERIC_PRE_FAULT_MEMORY
 	help
 	  Support hosting fully virtualized guest machines using hardware
 	  virtualization extensions.  You will need a fairly recent
diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index 10e90788b263..a045b23964c0 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -4647,6 +4647,78 @@  int kvm_tdp_page_fault(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault)
 	return direct_page_fault(vcpu, fault);
 }
 
+static int kvm_tdp_map_page(struct kvm_vcpu *vcpu, gpa_t gpa, u64 error_code,
+		     u8 *level)
+{
+	int r;
+
+	/* Restrict to TDP page fault. */
+	if (vcpu->arch.mmu->page_fault != kvm_tdp_page_fault)
+		return -EOPNOTSUPP;
+
+retry:
+	r = __kvm_mmu_do_page_fault(vcpu, gpa, error_code, true, NULL, level);
+	if (r < 0)
+		return r;
+
+	switch (r) {
+	case RET_PF_RETRY:
+		if (signal_pending(current))
+			return -EINTR;
+		cond_resched();
+		goto retry;
+
+	case RET_PF_FIXED:
+	case RET_PF_SPURIOUS:
+		break;
+
+	case RET_PF_EMULATE:
+		return -ENOENT;
+
+	case RET_PF_CONTINUE:
+	case RET_PF_INVALID:
+	default:
+		WARN_ON_ONCE(r);
+		return -EIO;
+	}
+
+	return 0;
+}
+
+long kvm_arch_vcpu_pre_fault_memory(struct kvm_vcpu *vcpu,
+				    struct kvm_pre_fault_memory *range)
+{
+	u64 error_code = PFERR_GUEST_FINAL_MASK;
+	u8 level = PG_LEVEL_4K;
+	u64 end;
+	int r;
+
+	/*
+	 * reload is efficient when called repeatedly, so we can do it on
+	 * every iteration.
+	 */
+	kvm_mmu_reload(vcpu);
+
+	if (kvm_arch_has_private_mem(vcpu->kvm) &&
+	    kvm_mem_is_private(vcpu->kvm, gpa_to_gfn(range->gpa)))
+		error_code |= PFERR_PRIVATE_ACCESS;
+
+	/*
+	 * Shadow paging uses GVA for kvm page fault, so restrict to
+	 * two-dimensional paging.
+	 */
+	r = kvm_tdp_map_page(vcpu, range->gpa, error_code, &level);
+	if (r < 0)
+		return r;
+
+	/*
+	 * If the mapping that covers range->gpa can use a huge page, it
+	 * may start below it or end after range->gpa + range->size.
+	 */
+	end = (range->gpa & KVM_HPAGE_MASK(level)) + KVM_HPAGE_SIZE(level);
+	return min(range->size, end - range->gpa);
+}
+
 static void nonpaging_init_context(struct kvm_mmu *context)
 {
 	context->page_fault = nonpaging_page_fault;
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 83b8260443a3..619ad713254e 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -4715,6 +4715,9 @@  int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
 	case KVM_CAP_MEMORY_FAULT_INFO:
 		r = 1;
 		break;
+	case KVM_CAP_PRE_FAULT_MEMORY:
+		r = tdp_enabled;
+		break;
 	case KVM_CAP_EXIT_HYPERCALL:
 		r = KVM_EXIT_HYPERCALL_VALID_MASK;
 		break;