diff mbox series

[v15,13/20] KVM: SEV: Implement gmem hook for initializing private pages

Message ID 20240501085210.2213060-14-michael.roth@amd.com (mailing list archive)
State Not Applicable
Delegated to: Herbert Xu
Headers show
Series Add AMD Secure Nested Paging (SEV-SNP) Hypervisor Support | expand

Commit Message

Michael Roth May 1, 2024, 8:52 a.m. UTC
This will handle the RMP table updates needed to put a page into a
private state before mapping it into an SEV-SNP guest.

Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Michael Roth <michael.roth@amd.com>
---
 arch/x86/kvm/Kconfig   |  1 +
 arch/x86/kvm/svm/sev.c | 98 ++++++++++++++++++++++++++++++++++++++++++
 arch/x86/kvm/svm/svm.c |  2 +
 arch/x86/kvm/svm/svm.h |  5 +++
 arch/x86/kvm/x86.c     |  5 +++
 virt/kvm/guest_memfd.c |  4 +-
 6 files changed, 113 insertions(+), 2 deletions(-)

Comments

Kai Huang May 20, 2024, 10:16 a.m. UTC | #1
On Wed, 2024-05-01 at 03:52 -0500, Michael Roth wrote:
> This will handle the RMP table updates needed to put a page into a
> private state before mapping it into an SEV-SNP guest.
> 
> 

[...]

> +int sev_gmem_prepare(struct kvm *kvm, kvm_pfn_t pfn, gfn_t gfn, int max_order)
> +{
> +	struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
> +	kvm_pfn_t pfn_aligned;
> +	gfn_t gfn_aligned;
> +	int level, rc;
> +	bool assigned;
> +
> +	if (!sev_snp_guest(kvm))
> +		return 0;
> +
> +	rc = snp_lookup_rmpentry(pfn, &assigned, &level);
> +	if (rc) {
> +		pr_err_ratelimited("SEV: Failed to look up RMP entry: GFN %llx PFN %llx error %d\n",
> +				   gfn, pfn, rc);
> +		return -ENOENT;
> +	}
> +
> +	if (assigned) {
> +		pr_debug("%s: already assigned: gfn %llx pfn %llx max_order %d level %d\n",
> +			 __func__, gfn, pfn, max_order, level);
> +		return 0;
> +	}
> +
> +	if (is_large_rmp_possible(kvm, pfn, max_order)) {
> +		level = PG_LEVEL_2M;
> +		pfn_aligned = ALIGN_DOWN(pfn, PTRS_PER_PMD);
> +		gfn_aligned = ALIGN_DOWN(gfn, PTRS_PER_PMD);
> +	} else {
> +		level = PG_LEVEL_4K;
> +		pfn_aligned = pfn;
> +		gfn_aligned = gfn;
> +	}
> +
> +	rc = rmp_make_private(pfn_aligned, gfn_to_gpa(gfn_aligned), level, sev->asid, false);
> +	if (rc) {
> +		pr_err_ratelimited("SEV: Failed to update RMP entry: GFN %llx PFN %llx level %d error %d\n",
> +				   gfn, pfn, level, rc);
> +		return -EINVAL;
> +	}
> +
> +	pr_debug("%s: updated: gfn %llx pfn %llx pfn_aligned %llx max_order %d level %d\n",
> +		 __func__, gfn, pfn, pfn_aligned, max_order, level);
> +
> +	return 0;
> +}
> diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
> index b70556608e8d..60783e9f2ae8 100644
> --- a/arch/x86/kvm/svm/svm.c
> +++ b/arch/x86/kvm/svm/svm.c
> @@ -5085,6 +5085,8 @@ static struct kvm_x86_ops svm_x86_ops __initdata = {
>  	.vcpu_deliver_sipi_vector = svm_vcpu_deliver_sipi_vector,
>  	.vcpu_get_apicv_inhibit_reasons = avic_vcpu_get_apicv_inhibit_reasons,
>  	.alloc_apic_backing_page = svm_alloc_apic_backing_page,
> +
> +	.gmem_prepare = sev_gmem_prepare,
>  };
>  
> 

+Rick, Isaku,

I am wondering whether this can be done in the KVM page fault handler?

The reason that I am asking is KVM will introduce several new
kvm_x86_ops::xx_private_spte() ops for TDX to handle setting up the
private mapping, and I am wondering whether SNP can just reuse some of
them so we can avoid having this .gmem_prepare():

        /* Add a page as page table page into private page table */
        int (*link_private_spt)(struct kvm *kvm, gfn_t gfn, 
			enum pg_level level, void *private_spt);
        /*
         * Free a page table page of private page table.
	 * ...
         */
        int (*free_private_spt)(struct kvm *kvm, gfn_t gfn, 
			enum pg_level level, void *private_spt);

        /* Add a guest private page into private page table */
        int (*set_private_spte)(struct kvm *kvm, gfn_t gfn, 
			enum pg_level level, kvm_pfn_t pfn);

        /* Remove a guest private page from private page table*/
        int (*remove_private_spte)(struct kvm *kvm, gfn_t gfn, 
			enum pg_level level, kvm_pfn_t pfn);
        /*
         * Keep a guest private page mapped in private page table, 
	 * but clear its present bit
         */
        int (*zap_private_spte)(struct kvm *kvm, gfn_t gfn,
			enum pg_level level);

The idea behind these is in the fault handler:

	bool use_private_pt = fault->is_private && 
			kvm_use_private_pt(kvm);

	root_pt = use_private_pt ? mmu->private_root_hpa : mmu->root_hpa;

	tdp_mmu_for_each_pte(&iter, root_pt, gfn, gfn+1, ..) {

		if (use_private_pt)
			kvm_x86_ops->xx_private_spte();
		else
			// normal TDP MMU ops
	}

Which means: if the fault is for private GPA, _AND_ when the VM has a
separate private table, use the specific xx_private_spte() ops to handle
private mapping.

But I am thinking we can use those hooks for SNP too, because
"conceptually", SNP also has concept of "private GPA" and must at least
issue some command to update the RMP table when private mapping is
setup/torn down.

So if we change the above logic to use fault->is_private, but not
'use_private_pt' to decide whether to invoke the
kvm_x86_ops::xx_private_spte(), then we can also implement SNP commands in
those callbacks IIUC:

	if (fault->is_private && kvm_x86_ops::xx_private_spte())
		kvm_x86_ops::xx_private_spte();
	else
		// normal TDP MMU operation

For SNP, these callbacks will operate on normal page table using the
normal TDP MMU code, but can do additional things like issuing commands as
shown in this patch.

My understanding is SNP doesn't need specific handling for middle level
page table, but should be able to utilize the ops when setting up /
tearing down the leaf SPTE?
Sean Christopherson May 20, 2024, 5:35 p.m. UTC | #2
On Mon, May 20, 2024, Kai Huang wrote:
> On Wed, 2024-05-01 at 03:52 -0500, Michael Roth wrote:
> > This will handle the RMP table updates needed to put a page into a
> > private state before mapping it into an SEV-SNP guest.
> > 
> > 
> 
> [...]
> 
> > +int sev_gmem_prepare(struct kvm *kvm, kvm_pfn_t pfn, gfn_t gfn, int max_order)

...

> +Rick, Isaku,
> 
> I am wondering whether this can be done in the KVM page fault handler?

No, because the state of a pfn in the RMP is tied to the guest_memfd inode, not
to the file descriptor, i.e. not to an individual VM.  And the NPT page tables
are treated as ephemeral for SNP.
Isaku Yamahata May 20, 2024, 7:14 p.m. UTC | #3
On Mon, May 20, 2024 at 10:16:54AM +0000,
"Huang, Kai" <kai.huang@intel.com> wrote:

> On Wed, 2024-05-01 at 03:52 -0500, Michael Roth wrote:
> > This will handle the RMP table updates needed to put a page into a
> > private state before mapping it into an SEV-SNP guest.
> > 
> > 
> 
> [...]
> 
> > +int sev_gmem_prepare(struct kvm *kvm, kvm_pfn_t pfn, gfn_t gfn, int max_order)
> > +{
> > +	struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
> > +	kvm_pfn_t pfn_aligned;
> > +	gfn_t gfn_aligned;
> > +	int level, rc;
> > +	bool assigned;
> > +
> > +	if (!sev_snp_guest(kvm))
> > +		return 0;
> > +
> > +	rc = snp_lookup_rmpentry(pfn, &assigned, &level);
> > +	if (rc) {
> > +		pr_err_ratelimited("SEV: Failed to look up RMP entry: GFN %llx PFN %llx error %d\n",
> > +				   gfn, pfn, rc);
> > +		return -ENOENT;
> > +	}
> > +
> > +	if (assigned) {
> > +		pr_debug("%s: already assigned: gfn %llx pfn %llx max_order %d level %d\n",
> > +			 __func__, gfn, pfn, max_order, level);
> > +		return 0;
> > +	}
> > +
> > +	if (is_large_rmp_possible(kvm, pfn, max_order)) {
> > +		level = PG_LEVEL_2M;
> > +		pfn_aligned = ALIGN_DOWN(pfn, PTRS_PER_PMD);
> > +		gfn_aligned = ALIGN_DOWN(gfn, PTRS_PER_PMD);
> > +	} else {
> > +		level = PG_LEVEL_4K;
> > +		pfn_aligned = pfn;
> > +		gfn_aligned = gfn;
> > +	}
> > +
> > +	rc = rmp_make_private(pfn_aligned, gfn_to_gpa(gfn_aligned), level, sev->asid, false);
> > +	if (rc) {
> > +		pr_err_ratelimited("SEV: Failed to update RMP entry: GFN %llx PFN %llx level %d error %d\n",
> > +				   gfn, pfn, level, rc);
> > +		return -EINVAL;
> > +	}
> > +
> > +	pr_debug("%s: updated: gfn %llx pfn %llx pfn_aligned %llx max_order %d level %d\n",
> > +		 __func__, gfn, pfn, pfn_aligned, max_order, level);
> > +
> > +	return 0;
> > +}
> > diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
> > index b70556608e8d..60783e9f2ae8 100644
> > --- a/arch/x86/kvm/svm/svm.c
> > +++ b/arch/x86/kvm/svm/svm.c
> > @@ -5085,6 +5085,8 @@ static struct kvm_x86_ops svm_x86_ops __initdata = {
> >  	.vcpu_deliver_sipi_vector = svm_vcpu_deliver_sipi_vector,
> >  	.vcpu_get_apicv_inhibit_reasons = avic_vcpu_get_apicv_inhibit_reasons,
> >  	.alloc_apic_backing_page = svm_alloc_apic_backing_page,
> > +
> > +	.gmem_prepare = sev_gmem_prepare,
> >  };
> >  
> > 
> 
> +Rick, Isaku,
> 
> I am wondering whether this can be done in the KVM page fault handler?
> 
> The reason that I am asking is KVM will introduce several new
> kvm_x86_ops::xx_private_spte() ops for TDX to handle setting up the
> private mapping, and I am wondering whether SNP can just reuse some of
> them so we can avoid having this .gmem_prepare():

Although I can't speak for SNP folks, I guess those hooks doesn't make sense for
them.  I guess they want to stay away from directly modifying the TDP MMU to add
hooks to the TDP MMU.  Instead, They intentionally chose to add hooks to
guest_memfd.  Maybe it's possible for SNP to use those hooks, what's the benefit
for SNP?

If you're looking for the benefit to allow the hooks of the TDP MMU for shared
page table, what about other vm type? SW_PROTECTED or future one?
Kai Huang May 20, 2024, 9:57 p.m. UTC | #4
On 21/05/2024 5:35 am, Sean Christopherson wrote:
> On Mon, May 20, 2024, Kai Huang wrote:
>> On Wed, 2024-05-01 at 03:52 -0500, Michael Roth wrote:
>>> This will handle the RMP table updates needed to put a page into a
>>> private state before mapping it into an SEV-SNP guest.
>>>
>>>
>>
>> [...]
>>
>>> +int sev_gmem_prepare(struct kvm *kvm, kvm_pfn_t pfn, gfn_t gfn, int max_order)
> 
> ...
> 
>> +Rick, Isaku,
>>
>> I am wondering whether this can be done in the KVM page fault handler?
> 
> No, because the state of a pfn in the RMP is tied to the guest_memfd inode, not
> to the file descriptor, i.e. not to an individual VM.  

It's strange that as state of a PFN of SNP doesn't bind to individual 
VM, at least for the private pages.  The command rpm_make_private() 
indeed reflects the mapping between PFN <-> <GFN, SSID>.

	rc = rmp_make_private(pfn_aligned, gfn_to_gpa(gfn_aligned),
			level, sev->asid, false);

> And the NPT page tables
> are treated as ephemeral for SNP.
> 

Do you mean private mappings for SNP guest can be zapped from the VM 
(the private pages are still there unchanged) and re-mapped later w/o 
needing to have guest's explicit acceptance?

If so, I think "we can zap" doesn't mean "we need to zap"?  Because the 
privates are now pinned anyway.  If we truly want to zap private 
mappings for SNP, IIUC it can be done by distinguishing whether a VM 
needs to use a separate private table, which is TDX-only.

I'll look into the SNP spec to understand more.
Sean Christopherson May 20, 2024, 11:15 p.m. UTC | #5
On Tue, May 21, 2024, Kai Huang wrote:
> On 21/05/2024 5:35 am, Sean Christopherson wrote:
> > On Mon, May 20, 2024, Kai Huang wrote:
> > > I am wondering whether this can be done in the KVM page fault handler?
> > 
> > No, because the state of a pfn in the RMP is tied to the guest_memfd inode,
> > not to the file descriptor, i.e. not to an individual VM.
> 
> It's strange that as state of a PFN of SNP doesn't bind to individual VM, at
> least for the private pages.  The command rpm_make_private() indeed reflects
> the mapping between PFN <-> <GFN, SSID>.

s/SSID/ASID

KVM allows a single ASID to be bound to multiple "struct kvm" instances, e.g.
for intra-host migration.  If/when trusted I/O is a thing, presumably KVM will
also need to share the ASID with other entities, e.g. IOMMUFD.

> 	rc = rmp_make_private(pfn_aligned, gfn_to_gpa(gfn_aligned),
> 			level, sev->asid, false);
>
> > And the NPT page tables are treated as ephemeral for SNP.
> 
> Do you mean private mappings for SNP guest can be zapped from the VM (the
> private pages are still there unchanged) and re-mapped later w/o needing to
> have guest's explicit acceptance?

Correct.

> If so, I think "we can zap" doesn't mean "we need to zap"?

Correct.

> Because the privates are now pinned anyway.

Pinning is an orthogonal issue.  And it's not so much that the pfns are pinned
as it is that guest_memfd simply doesn't support page migration or swap at this
time.

Regardless of whether or not guest_memfd supports page migration, KVM needs to
track the state of the physical page in guest_memfd, e.g. if it's been assigned
to the ASID versus if it's still in a shared state.

> If we truly want to zap private mappings for SNP, IIUC it can be done by
> distinguishing whether a VM needs to use a separate private table, which is
> TDX-only.

I wouldn't say we "want" to zap private mappings for SNP, rather that it's a lot
less work to keep KVM's existing behavior (literally do nothing) than it is to
rework the MMU and whatnot to not zap SPTEs.  And there's no big motivation to
avoid zapping because SNP VMs are unlikely to delete memslots.

If it turns out that it's easy to preserve SNP mappings after TDX lands, then we
can certainly go that route, but AFAIK there's no reason to force the issue.
Kai Huang May 20, 2024, 11:41 p.m. UTC | #6
On 21/05/2024 11:15 am, Sean Christopherson wrote:
> On Tue, May 21, 2024, Kai Huang wrote:
>> On 21/05/2024 5:35 am, Sean Christopherson wrote:
>>> On Mon, May 20, 2024, Kai Huang wrote:
>>>> I am wondering whether this can be done in the KVM page fault handler?
>>>
>>> No, because the state of a pfn in the RMP is tied to the guest_memfd inode,
>>> not to the file descriptor, i.e. not to an individual VM.
>>
>> It's strange that as state of a PFN of SNP doesn't bind to individual VM, at
>> least for the private pages.  The command rpm_make_private() indeed reflects
>> the mapping between PFN <-> <GFN, SSID>.
> 
> s/SSID/ASID
> 
> KVM allows a single ASID to be bound to multiple "struct kvm" instances, e.g.
> for intra-host migration.  If/when trusted I/O is a thing, presumably KVM will
> also need to share the ASID with other entities, e.g. IOMMUFD.

But is this the case for SNP?  I thought due to the nature of private 
pages, they cannot be shared between VMs?  So to me this RMP entry 
mapping for PFN <-> GFN for private page should just be per-VM.

> 
>> 	rc = rmp_make_private(pfn_aligned, gfn_to_gpa(gfn_aligned),
>> 			level, sev->asid, false);
>>
>>> And the NPT page tables are treated as ephemeral for SNP.
>>
>> Do you mean private mappings for SNP guest can be zapped from the VM (the
>> private pages are still there unchanged) and re-mapped later w/o needing to
>> have guest's explicit acceptance?
> 
> Correct.
> 
>> If so, I think "we can zap" doesn't mean "we need to zap"?
> 
> Correct.
> 
>> Because the privates are now pinned anyway.
> 
> Pinning is an orthogonal issue.  And it's not so much that the pfns are pinned
> as it is that guest_memfd simply doesn't support page migration or swap at this
> time.

Yes.

> 
> Regardless of whether or not guest_memfd supports page migration, KVM needs to
> track the state of the physical page in guest_memfd, e.g. if it's been assigned
> to the ASID versus if it's still in a shared state.

I am not certain this can impact whether we want to do RMP commands via 
guest_memfd() hooks or TDP MMU hooks?

> 
>> If we truly want to zap private mappings for SNP, IIUC it can be done by
>> distinguishing whether a VM needs to use a separate private table, which is
>> TDX-only.
> 
> I wouldn't say we "want" to zap private mappings for SNP, rather that it's a lot
> less work to keep KVM's existing behavior (literally do nothing) than it is to
> rework the MMU and whatnot to not zap SPTEs.  

My thinking too.

> And there's no big motivation to
> avoid zapping because SNP VMs are unlikely to delete memslots.

I think we should also consider MMU notifier?

> 
> If it turns out that it's easy to preserve SNP mappings after TDX lands, then we
> can certainly go that route, but AFAIK there's no reason to force the issue.

No I am certainly not saying we should do SNP after TDX.  Sorry I didn't 
closely monitor the status of this SNP patchset.

My intention is just wanting to make the TDP MMU common code change more 
useful (since we need that for TDX anyway), i.e., not effectively just 
for TDX if possible:

Currently the TDP MMU hooks are called depending whether the page table 
type is private (or mirrored whatever), but I think conceptually, we 
should decide whether to call TDP MMU hooks based on whether faulting 
GPA is private, _AND_ when the hook is available.

https://lore.kernel.org/lkml/5e8119c0-31f5-4aa9-a496-4ae10bd745a3@intel.com/

If invoking SNP RMP commands is feasible in TDP MMU hooks, then I think 
there's value of letting SNP code to use them too.  And we can simply 
split one patch out to only add the TDP MMU hooks for SNP to land first.
Sean Christopherson May 21, 2024, 12:30 a.m. UTC | #7
On Tue, May 21, 2024, Kai Huang wrote:
> On 21/05/2024 11:15 am, Sean Christopherson wrote:
> > On Tue, May 21, 2024, Kai Huang wrote:
> > > On 21/05/2024 5:35 am, Sean Christopherson wrote:
> > > > On Mon, May 20, 2024, Kai Huang wrote:
> > > > > I am wondering whether this can be done in the KVM page fault handler?
> > > > 
> > > > No, because the state of a pfn in the RMP is tied to the guest_memfd inode,
> > > > not to the file descriptor, i.e. not to an individual VM.
> > > 
> > > It's strange that as state of a PFN of SNP doesn't bind to individual VM, at
> > > least for the private pages.  The command rpm_make_private() indeed reflects
> > > the mapping between PFN <-> <GFN, SSID>.
> > 
> > s/SSID/ASID
> > 
> > KVM allows a single ASID to be bound to multiple "struct kvm" instances, e.g.
> > for intra-host migration.  If/when trusted I/O is a thing, presumably KVM will
> > also need to share the ASID with other entities, e.g. IOMMUFD.
> 
> But is this the case for SNP?  I thought due to the nature of private pages,
> they cannot be shared between VMs?  So to me this RMP entry mapping for PFN
> <-> GFN for private page should just be per-VM.

Sorry to redirect, but please read this mail (and probably surrounding mails).
It hopefully explains most of the question you have.

https://lore.kernel.org/all/ZLGiEfJZTyl7M8mS@google.com

> > Regardless of whether or not guest_memfd supports page migration, KVM needs to
> > track the state of the physical page in guest_memfd, e.g. if it's been assigned
> > to the ASID versus if it's still in a shared state.
> 
> I am not certain this can impact whether we want to do RMP commands via
> guest_memfd() hooks or TDP MMU hooks?
>
> > > If we truly want to zap private mappings for SNP, IIUC it can be done by
> > > distinguishing whether a VM needs to use a separate private table, which is
> > > TDX-only.
> > 
> > I wouldn't say we "want" to zap private mappings for SNP, rather that it's a lot
> > less work to keep KVM's existing behavior (literally do nothing) than it is to
> > rework the MMU and whatnot to not zap SPTEs.
> 
> My thinking too.
> 
> > And there's no big motivation to avoid zapping because SNP VMs are unlikely
> > to delete memslots.
> 
> I think we should also consider MMU notifier?

No, private mappings have no host userspace mappings, i.e. are completely exempt
from MMU notifier events.  guest_memfd() can still invalidate mappings, but that
only occurs if userspace punches a hole, which is destructive.

> > If it turns out that it's easy to preserve SNP mappings after TDX lands, then we
> > can certainly go that route, but AFAIK there's no reason to force the issue.
> 
> No I am certainly not saying we should do SNP after TDX.  Sorry I didn't
> closely monitor the status of this SNP patchset.
> 
> My intention is just wanting to make the TDP MMU common code change more
> useful (since we need that for TDX anyway), i.e., not effectively just for
> TDX if possible:
> 
> Currently the TDP MMU hooks are called depending whether the page table type
> is private (or mirrored whatever), but I think conceptually, we should
> decide whether to call TDP MMU hooks based on whether faulting GPA is
> private, _AND_ when the hook is available.
> 
> https://lore.kernel.org/lkml/5e8119c0-31f5-4aa9-a496-4ae10bd745a3@intel.com/
> 
> If invoking SNP RMP commands is feasible in TDP MMU hooks,

Feasible.  Yes.  Desirable?  No.  Either KVM tracks the state of the physical page
using the guest_memfd inode, or KVM _guarantees_ the NPT mappings _never_ get
dropped, including during intra-host migration.  E.g. to support intra-host
migration of TDX VMs, KVM is pretty much forced to transer the S-EPT tables as-is,
which is ugly and painful (though performant).  We could do the same for NPT, but
there would need to be massive performance benefits to justify the complexity.

> then I think there's value of letting SNP code to use them too.  And we can
> simply split one patch out to only add the TDP MMU hooks for SNP to land
> first.
diff mbox series

Patch

diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig
index 5e72faca4e8f..10768f13b240 100644
--- a/arch/x86/kvm/Kconfig
+++ b/arch/x86/kvm/Kconfig
@@ -137,6 +137,7 @@  config KVM_AMD_SEV
 	depends on CRYPTO_DEV_SP_PSP && !(KVM_AMD=y && CRYPTO_DEV_CCP_DD=m)
 	select ARCH_HAS_CC_PLATFORM
 	select KVM_GENERIC_PRIVATE_MEM
+	select HAVE_KVM_GMEM_PREPARE
 	help
 	  Provides support for launching Encrypted VMs (SEV) and Encrypted VMs
 	  with Encrypted State (SEV-ES) on AMD processors.
diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c
index 69ec8f577763..0439ec12fa90 100644
--- a/arch/x86/kvm/svm/sev.c
+++ b/arch/x86/kvm/svm/sev.c
@@ -4557,3 +4557,101 @@  void sev_handle_rmp_fault(struct kvm_vcpu *vcpu, gpa_t gpa, u64 error_code)
 out_no_trace:
 	put_page(pfn_to_page(pfn));
 }
+
+static bool is_pfn_range_shared(kvm_pfn_t start, kvm_pfn_t end)
+{
+	kvm_pfn_t pfn = start;
+
+	while (pfn < end) {
+		int ret, rmp_level;
+		bool assigned;
+
+		ret = snp_lookup_rmpentry(pfn, &assigned, &rmp_level);
+		if (ret) {
+			pr_warn_ratelimited("SEV: Failed to retrieve RMP entry: PFN 0x%llx GFN start 0x%llx GFN end 0x%llx RMP level %d error %d\n",
+					    pfn, start, end, rmp_level, ret);
+			return false;
+		}
+
+		if (assigned) {
+			pr_debug("%s: overlap detected, PFN 0x%llx start 0x%llx end 0x%llx RMP level %d\n",
+				 __func__, pfn, start, end, rmp_level);
+			return false;
+		}
+
+		pfn++;
+	}
+
+	return true;
+}
+
+static u8 max_level_for_order(int order)
+{
+	if (order >= KVM_HPAGE_GFN_SHIFT(PG_LEVEL_2M))
+		return PG_LEVEL_2M;
+
+	return PG_LEVEL_4K;
+}
+
+static bool is_large_rmp_possible(struct kvm *kvm, kvm_pfn_t pfn, int order)
+{
+	kvm_pfn_t pfn_aligned = ALIGN_DOWN(pfn, PTRS_PER_PMD);
+
+	/*
+	 * If this is a large folio, and the entire 2M range containing the
+	 * PFN is currently shared, then the entire 2M-aligned range can be
+	 * set to private via a single 2M RMP entry.
+	 */
+	if (max_level_for_order(order) > PG_LEVEL_4K &&
+	    is_pfn_range_shared(pfn_aligned, pfn_aligned + PTRS_PER_PMD))
+		return true;
+
+	return false;
+}
+
+int sev_gmem_prepare(struct kvm *kvm, kvm_pfn_t pfn, gfn_t gfn, int max_order)
+{
+	struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
+	kvm_pfn_t pfn_aligned;
+	gfn_t gfn_aligned;
+	int level, rc;
+	bool assigned;
+
+	if (!sev_snp_guest(kvm))
+		return 0;
+
+	rc = snp_lookup_rmpentry(pfn, &assigned, &level);
+	if (rc) {
+		pr_err_ratelimited("SEV: Failed to look up RMP entry: GFN %llx PFN %llx error %d\n",
+				   gfn, pfn, rc);
+		return -ENOENT;
+	}
+
+	if (assigned) {
+		pr_debug("%s: already assigned: gfn %llx pfn %llx max_order %d level %d\n",
+			 __func__, gfn, pfn, max_order, level);
+		return 0;
+	}
+
+	if (is_large_rmp_possible(kvm, pfn, max_order)) {
+		level = PG_LEVEL_2M;
+		pfn_aligned = ALIGN_DOWN(pfn, PTRS_PER_PMD);
+		gfn_aligned = ALIGN_DOWN(gfn, PTRS_PER_PMD);
+	} else {
+		level = PG_LEVEL_4K;
+		pfn_aligned = pfn;
+		gfn_aligned = gfn;
+	}
+
+	rc = rmp_make_private(pfn_aligned, gfn_to_gpa(gfn_aligned), level, sev->asid, false);
+	if (rc) {
+		pr_err_ratelimited("SEV: Failed to update RMP entry: GFN %llx PFN %llx level %d error %d\n",
+				   gfn, pfn, level, rc);
+		return -EINVAL;
+	}
+
+	pr_debug("%s: updated: gfn %llx pfn %llx pfn_aligned %llx max_order %d level %d\n",
+		 __func__, gfn, pfn, pfn_aligned, max_order, level);
+
+	return 0;
+}
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index b70556608e8d..60783e9f2ae8 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -5085,6 +5085,8 @@  static struct kvm_x86_ops svm_x86_ops __initdata = {
 	.vcpu_deliver_sipi_vector = svm_vcpu_deliver_sipi_vector,
 	.vcpu_get_apicv_inhibit_reasons = avic_vcpu_get_apicv_inhibit_reasons,
 	.alloc_apic_backing_page = svm_alloc_apic_backing_page,
+
+	.gmem_prepare = sev_gmem_prepare,
 };
 
 /*
diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h
index 858e74a26fab..ff1aca7e10e9 100644
--- a/arch/x86/kvm/svm/svm.h
+++ b/arch/x86/kvm/svm/svm.h
@@ -736,6 +736,7 @@  extern unsigned int max_sev_asid;
 void sev_handle_rmp_fault(struct kvm_vcpu *vcpu, gpa_t gpa, u64 error_code);
 void sev_vcpu_unblocking(struct kvm_vcpu *vcpu);
 void sev_snp_init_protected_guest_state(struct kvm_vcpu *vcpu);
+int sev_gmem_prepare(struct kvm *kvm, kvm_pfn_t pfn, gfn_t gfn, int max_order);
 #else
 static inline struct page *snp_safe_alloc_page(struct kvm_vcpu *vcpu) {
 	return alloc_page(GFP_KERNEL_ACCOUNT | __GFP_ZERO);
@@ -752,6 +753,10 @@  static inline int sev_dev_get_attr(u32 group, u64 attr, u64 *val) { return -ENXI
 static inline void sev_handle_rmp_fault(struct kvm_vcpu *vcpu, gpa_t gpa, u64 error_code) {}
 static inline void sev_vcpu_unblocking(struct kvm_vcpu *vcpu) {}
 static inline void sev_snp_init_protected_guest_state(struct kvm_vcpu *vcpu) {}
+static inline int sev_gmem_prepare(struct kvm *kvm, kvm_pfn_t pfn, gfn_t gfn, int max_order)
+{
+	return 0;
+}
 
 #endif
 
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index b20f6c1b8214..0fb76ef9b7e9 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -13610,6 +13610,11 @@  bool kvm_arch_no_poll(struct kvm_vcpu *vcpu)
 EXPORT_SYMBOL_GPL(kvm_arch_no_poll);
 
 #ifdef CONFIG_HAVE_KVM_GMEM_PREPARE
+bool kvm_arch_gmem_prepare_needed(struct kvm *kvm)
+{
+	return kvm->arch.vm_type == KVM_X86_SNP_VM;
+}
+
 int kvm_arch_gmem_prepare(struct kvm *kvm, gfn_t gfn, kvm_pfn_t pfn, int max_order)
 {
 	return static_call(kvm_x86_gmem_prepare)(kvm, pfn, gfn, max_order);
diff --git a/virt/kvm/guest_memfd.c b/virt/kvm/guest_memfd.c
index a44f983eb673..7d3932e5a689 100644
--- a/virt/kvm/guest_memfd.c
+++ b/virt/kvm/guest_memfd.c
@@ -46,8 +46,8 @@  static int kvm_gmem_prepare_folio(struct inode *inode, pgoff_t index, struct fol
 		gfn = slot->base_gfn + index - slot->gmem.pgoff;
 		rc = kvm_arch_gmem_prepare(kvm, gfn, pfn, compound_order(compound_head(page)));
 		if (rc) {
-			pr_warn_ratelimited("gmem: Failed to prepare folio for index %lx, error %d.\n",
-					    index, rc);
+			pr_warn_ratelimited("gmem: Failed to prepare folio for index %lx GFN %llx PFN %llx error %d.\n",
+					    index, gfn, pfn, rc);
 			return rc;
 		}
 	}