diff mbox series

[v4,03/10] KVM: guest_memfd: Allow host to map guest_memfd() pages

Message ID 20250218172500.807733-4-tabba@google.com (mailing list archive)
State Not Applicable
Headers show
Series KVM: Mapping guest_memfd backed memory at the host for software protected VMs | expand

Commit Message

Fuad Tabba Feb. 18, 2025, 5:24 p.m. UTC
Add support for mmap() and fault() for guest_memfd backed memory
in the host for VMs that support in-place conversion between
shared and private. To that end, this patch adds the ability to
check whether the VM type supports in-place conversion, and only
allows mapping its memory if that's the case.

This behavior is also gated by the configuration option
KVM_GMEM_SHARED_MEM.

Signed-off-by: Fuad Tabba <tabba@google.com>
---
 include/linux/kvm_host.h |  11 +++++
 virt/kvm/guest_memfd.c   | 103 +++++++++++++++++++++++++++++++++++++++
 2 files changed, 114 insertions(+)

Comments

David Hildenbrand Feb. 20, 2025, 11:58 a.m. UTC | #1
On 18.02.25 18:24, Fuad Tabba wrote:
> Add support for mmap() and fault() for guest_memfd backed memory
> in the host for VMs that support in-place conversion between
> shared and private. To that end, this patch adds the ability to
> check whether the VM type supports in-place conversion, and only
> allows mapping its memory if that's the case.
> 
> This behavior is also gated by the configuration option
> KVM_GMEM_SHARED_MEM.
> 
> Signed-off-by: Fuad Tabba <tabba@google.com>
> ---
>   include/linux/kvm_host.h |  11 +++++
>   virt/kvm/guest_memfd.c   | 103 +++++++++++++++++++++++++++++++++++++++
>   2 files changed, 114 insertions(+)
> 
> diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
> index 3ad0719bfc4f..f9e8b10a4b09 100644
> --- a/include/linux/kvm_host.h
> +++ b/include/linux/kvm_host.h
> @@ -728,6 +728,17 @@ static inline bool kvm_arch_has_private_mem(struct kvm *kvm)
>   }
>   #endif
>   
> +/*
> + * Arch code must define kvm_arch_gmem_supports_shared_mem if support for
> + * private memory is enabled and it supports in-place shared/private conversion.
> + */
> +#if !defined(kvm_arch_gmem_supports_shared_mem) && !IS_ENABLED(CONFIG_KVM_PRIVATE_MEM)
> +static inline bool kvm_arch_gmem_supports_shared_mem(struct kvm *kvm)
> +{
> +	return false;
> +}
> +#endif
> +
>   #ifndef kvm_arch_has_readonly_mem
>   static inline bool kvm_arch_has_readonly_mem(struct kvm *kvm)
>   {
> diff --git a/virt/kvm/guest_memfd.c b/virt/kvm/guest_memfd.c
> index c6f6792bec2a..30b47ff0e6d2 100644
> --- a/virt/kvm/guest_memfd.c
> +++ b/virt/kvm/guest_memfd.c
> @@ -317,9 +317,112 @@ void kvm_gmem_handle_folio_put(struct folio *folio)
>   {
>   	WARN_ONCE(1, "A placeholder that shouldn't trigger. Work in progress.");
>   }
> +
> +static bool kvm_gmem_offset_is_shared(struct file *file, pgoff_t index)
> +{
> +	struct kvm_gmem *gmem = file->private_data;
> +
> +	/* For now, VMs that support shared memory share all their memory. */
> +	return kvm_arch_gmem_supports_shared_mem(gmem->kvm);
> +}
> +
> +static vm_fault_t kvm_gmem_fault(struct vm_fault *vmf)
> +{
> +	struct inode *inode = file_inode(vmf->vma->vm_file);
> +	struct folio *folio;
> +	vm_fault_t ret = VM_FAULT_LOCKED;
> +
> +	filemap_invalidate_lock_shared(inode->i_mapping);
> +
> +	folio = kvm_gmem_get_folio(inode, vmf->pgoff);
> +	if (IS_ERR(folio)) {
> +		switch (PTR_ERR(folio)) {
> +		case -EAGAIN:
> +			ret = VM_FAULT_RETRY;
> +			break;
> +		case -ENOMEM:
> +			ret = VM_FAULT_OOM;
> +			break;
> +		default:
> +			ret = VM_FAULT_SIGBUS;
> +			break;
> +		}
> +		goto out_filemap;
> +	}
> +
> +	if (folio_test_hwpoison(folio)) {
> +		ret = VM_FAULT_HWPOISON;
> +		goto out_folio;
> +	}
> +
> +	/* Must be called with folio lock held, i.e., after kvm_gmem_get_folio() */
> +	if (!kvm_gmem_offset_is_shared(vmf->vma->vm_file, vmf->pgoff)) {
> +		ret = VM_FAULT_SIGBUS;
> +		goto out_folio;
> +	}
> +
> +	/*
> +	 * Only private folios are marked as "guestmem" so far, and we never
> +	 * expect private folios at this point.
> +	 */
> +	if (WARN_ON_ONCE(folio_test_guestmem(folio)))  {
> +		ret = VM_FAULT_SIGBUS;
> +		goto out_folio;
> +	}
> +
> +	/* No support for huge pages. */
> +	if (WARN_ON_ONCE(folio_test_large(folio))) {
> +		ret = VM_FAULT_SIGBUS;
> +		goto out_folio;
> +	}
> +
> +	if (!folio_test_uptodate(folio)) {
> +		clear_highpage(folio_page(folio, 0));
> +		kvm_gmem_mark_prepared(folio);
> +	}

kvm_gmem_get_pfn()->__kvm_gmem_get_pfn() seems to call 
kvm_gmem_prepare_folio() instead.

Could we do the same here?
Fuad Tabba Feb. 20, 2025, 12:04 p.m. UTC | #2
On Thu, 20 Feb 2025 at 11:58, David Hildenbrand <david@redhat.com> wrote:
>
> On 18.02.25 18:24, Fuad Tabba wrote:
> > Add support for mmap() and fault() for guest_memfd backed memory
> > in the host for VMs that support in-place conversion between
> > shared and private. To that end, this patch adds the ability to
> > check whether the VM type supports in-place conversion, and only
> > allows mapping its memory if that's the case.
> >
> > This behavior is also gated by the configuration option
> > KVM_GMEM_SHARED_MEM.
> >
> > Signed-off-by: Fuad Tabba <tabba@google.com>
> > ---
> >   include/linux/kvm_host.h |  11 +++++
> >   virt/kvm/guest_memfd.c   | 103 +++++++++++++++++++++++++++++++++++++++
> >   2 files changed, 114 insertions(+)
> >
> > diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
> > index 3ad0719bfc4f..f9e8b10a4b09 100644
> > --- a/include/linux/kvm_host.h
> > +++ b/include/linux/kvm_host.h
> > @@ -728,6 +728,17 @@ static inline bool kvm_arch_has_private_mem(struct kvm *kvm)
> >   }
> >   #endif
> >
> > +/*
> > + * Arch code must define kvm_arch_gmem_supports_shared_mem if support for
> > + * private memory is enabled and it supports in-place shared/private conversion.
> > + */
> > +#if !defined(kvm_arch_gmem_supports_shared_mem) && !IS_ENABLED(CONFIG_KVM_PRIVATE_MEM)
> > +static inline bool kvm_arch_gmem_supports_shared_mem(struct kvm *kvm)
> > +{
> > +     return false;
> > +}
> > +#endif
> > +
> >   #ifndef kvm_arch_has_readonly_mem
> >   static inline bool kvm_arch_has_readonly_mem(struct kvm *kvm)
> >   {
> > diff --git a/virt/kvm/guest_memfd.c b/virt/kvm/guest_memfd.c
> > index c6f6792bec2a..30b47ff0e6d2 100644
> > --- a/virt/kvm/guest_memfd.c
> > +++ b/virt/kvm/guest_memfd.c
> > @@ -317,9 +317,112 @@ void kvm_gmem_handle_folio_put(struct folio *folio)
> >   {
> >       WARN_ONCE(1, "A placeholder that shouldn't trigger. Work in progress.");
> >   }
> > +
> > +static bool kvm_gmem_offset_is_shared(struct file *file, pgoff_t index)
> > +{
> > +     struct kvm_gmem *gmem = file->private_data;
> > +
> > +     /* For now, VMs that support shared memory share all their memory. */
> > +     return kvm_arch_gmem_supports_shared_mem(gmem->kvm);
> > +}
> > +
> > +static vm_fault_t kvm_gmem_fault(struct vm_fault *vmf)
> > +{
> > +     struct inode *inode = file_inode(vmf->vma->vm_file);
> > +     struct folio *folio;
> > +     vm_fault_t ret = VM_FAULT_LOCKED;
> > +
> > +     filemap_invalidate_lock_shared(inode->i_mapping);
> > +
> > +     folio = kvm_gmem_get_folio(inode, vmf->pgoff);
> > +     if (IS_ERR(folio)) {
> > +             switch (PTR_ERR(folio)) {
> > +             case -EAGAIN:
> > +                     ret = VM_FAULT_RETRY;
> > +                     break;
> > +             case -ENOMEM:
> > +                     ret = VM_FAULT_OOM;
> > +                     break;
> > +             default:
> > +                     ret = VM_FAULT_SIGBUS;
> > +                     break;
> > +             }
> > +             goto out_filemap;
> > +     }
> > +
> > +     if (folio_test_hwpoison(folio)) {
> > +             ret = VM_FAULT_HWPOISON;
> > +             goto out_folio;
> > +     }
> > +
> > +     /* Must be called with folio lock held, i.e., after kvm_gmem_get_folio() */
> > +     if (!kvm_gmem_offset_is_shared(vmf->vma->vm_file, vmf->pgoff)) {
> > +             ret = VM_FAULT_SIGBUS;
> > +             goto out_folio;
> > +     }
> > +
> > +     /*
> > +      * Only private folios are marked as "guestmem" so far, and we never
> > +      * expect private folios at this point.
> > +      */
> > +     if (WARN_ON_ONCE(folio_test_guestmem(folio)))  {
> > +             ret = VM_FAULT_SIGBUS;
> > +             goto out_folio;
> > +     }
> > +
> > +     /* No support for huge pages. */
> > +     if (WARN_ON_ONCE(folio_test_large(folio))) {
> > +             ret = VM_FAULT_SIGBUS;
> > +             goto out_folio;
> > +     }
> > +
> > +     if (!folio_test_uptodate(folio)) {
> > +             clear_highpage(folio_page(folio, 0));
> > +             kvm_gmem_mark_prepared(folio);
> > +     }
>
> kvm_gmem_get_pfn()->__kvm_gmem_get_pfn() seems to call
> kvm_gmem_prepare_folio() instead.
>
> Could we do the same here?

Will do.

Thanks,
/fuad

> --
> Cheers,
>
> David / dhildenb
>
Fuad Tabba Feb. 20, 2025, 3:45 p.m. UTC | #3
Hi David,

On Thu, 20 Feb 2025 at 12:04, Fuad Tabba <tabba@google.com> wrote:
>
> On Thu, 20 Feb 2025 at 11:58, David Hildenbrand <david@redhat.com> wrote:
> >
> > On 18.02.25 18:24, Fuad Tabba wrote:
> > > Add support for mmap() and fault() for guest_memfd backed memory
> > > in the host for VMs that support in-place conversion between
> > > shared and private. To that end, this patch adds the ability to
> > > check whether the VM type supports in-place conversion, and only
> > > allows mapping its memory if that's the case.
> > >
> > > This behavior is also gated by the configuration option
> > > KVM_GMEM_SHARED_MEM.
> > >
> > > Signed-off-by: Fuad Tabba <tabba@google.com>
> > > ---
> > >   include/linux/kvm_host.h |  11 +++++
> > >   virt/kvm/guest_memfd.c   | 103 +++++++++++++++++++++++++++++++++++++++
> > >   2 files changed, 114 insertions(+)
> > >
> > > diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
> > > index 3ad0719bfc4f..f9e8b10a4b09 100644
> > > --- a/include/linux/kvm_host.h
> > > +++ b/include/linux/kvm_host.h
> > > @@ -728,6 +728,17 @@ static inline bool kvm_arch_has_private_mem(struct kvm *kvm)
> > >   }
> > >   #endif
> > >
> > > +/*
> > > + * Arch code must define kvm_arch_gmem_supports_shared_mem if support for
> > > + * private memory is enabled and it supports in-place shared/private conversion.
> > > + */
> > > +#if !defined(kvm_arch_gmem_supports_shared_mem) && !IS_ENABLED(CONFIG_KVM_PRIVATE_MEM)
> > > +static inline bool kvm_arch_gmem_supports_shared_mem(struct kvm *kvm)
> > > +{
> > > +     return false;
> > > +}
> > > +#endif
> > > +
> > >   #ifndef kvm_arch_has_readonly_mem
> > >   static inline bool kvm_arch_has_readonly_mem(struct kvm *kvm)
> > >   {
> > > diff --git a/virt/kvm/guest_memfd.c b/virt/kvm/guest_memfd.c
> > > index c6f6792bec2a..30b47ff0e6d2 100644
> > > --- a/virt/kvm/guest_memfd.c
> > > +++ b/virt/kvm/guest_memfd.c
> > > @@ -317,9 +317,112 @@ void kvm_gmem_handle_folio_put(struct folio *folio)
> > >   {
> > >       WARN_ONCE(1, "A placeholder that shouldn't trigger. Work in progress.");
> > >   }
> > > +
> > > +static bool kvm_gmem_offset_is_shared(struct file *file, pgoff_t index)
> > > +{
> > > +     struct kvm_gmem *gmem = file->private_data;
> > > +
> > > +     /* For now, VMs that support shared memory share all their memory. */
> > > +     return kvm_arch_gmem_supports_shared_mem(gmem->kvm);
> > > +}
> > > +
> > > +static vm_fault_t kvm_gmem_fault(struct vm_fault *vmf)
> > > +{
> > > +     struct inode *inode = file_inode(vmf->vma->vm_file);
> > > +     struct folio *folio;
> > > +     vm_fault_t ret = VM_FAULT_LOCKED;
> > > +
> > > +     filemap_invalidate_lock_shared(inode->i_mapping);
> > > +
> > > +     folio = kvm_gmem_get_folio(inode, vmf->pgoff);
> > > +     if (IS_ERR(folio)) {
> > > +             switch (PTR_ERR(folio)) {
> > > +             case -EAGAIN:
> > > +                     ret = VM_FAULT_RETRY;
> > > +                     break;
> > > +             case -ENOMEM:
> > > +                     ret = VM_FAULT_OOM;
> > > +                     break;
> > > +             default:
> > > +                     ret = VM_FAULT_SIGBUS;
> > > +                     break;
> > > +             }
> > > +             goto out_filemap;
> > > +     }
> > > +
> > > +     if (folio_test_hwpoison(folio)) {
> > > +             ret = VM_FAULT_HWPOISON;
> > > +             goto out_folio;
> > > +     }
> > > +
> > > +     /* Must be called with folio lock held, i.e., after kvm_gmem_get_folio() */
> > > +     if (!kvm_gmem_offset_is_shared(vmf->vma->vm_file, vmf->pgoff)) {
> > > +             ret = VM_FAULT_SIGBUS;
> > > +             goto out_folio;
> > > +     }
> > > +
> > > +     /*
> > > +      * Only private folios are marked as "guestmem" so far, and we never
> > > +      * expect private folios at this point.
> > > +      */
> > > +     if (WARN_ON_ONCE(folio_test_guestmem(folio)))  {
> > > +             ret = VM_FAULT_SIGBUS;
> > > +             goto out_folio;
> > > +     }
> > > +
> > > +     /* No support for huge pages. */
> > > +     if (WARN_ON_ONCE(folio_test_large(folio))) {
> > > +             ret = VM_FAULT_SIGBUS;
> > > +             goto out_folio;
> > > +     }
> > > +
> > > +     if (!folio_test_uptodate(folio)) {
> > > +             clear_highpage(folio_page(folio, 0));
> > > +             kvm_gmem_mark_prepared(folio);
> > > +     }
> >
> > kvm_gmem_get_pfn()->__kvm_gmem_get_pfn() seems to call
> > kvm_gmem_prepare_folio() instead.
> >
> > Could we do the same here?
>
> Will do.

I realized it's not that straightforward. __kvm_gmem_prepare_folio()
requires the kvm_memory_slot, which is used to calculate the gfn. At
that point we have neither, and it's not just an issue of access, but
there might not be a slot associated with that yet.

Cheers,
/fuad
>
> Thanks,
> /fuad
>
> > --
> > Cheers,
> >
> > David / dhildenb
> >
David Hildenbrand Feb. 20, 2025, 3:58 p.m. UTC | #4
On 20.02.25 16:45, Fuad Tabba wrote:
> Hi David,
> 
> On Thu, 20 Feb 2025 at 12:04, Fuad Tabba <tabba@google.com> wrote:
>>
>> On Thu, 20 Feb 2025 at 11:58, David Hildenbrand <david@redhat.com> wrote:
>>>
>>> On 18.02.25 18:24, Fuad Tabba wrote:
>>>> Add support for mmap() and fault() for guest_memfd backed memory
>>>> in the host for VMs that support in-place conversion between
>>>> shared and private. To that end, this patch adds the ability to
>>>> check whether the VM type supports in-place conversion, and only
>>>> allows mapping its memory if that's the case.
>>>>
>>>> This behavior is also gated by the configuration option
>>>> KVM_GMEM_SHARED_MEM.
>>>>
>>>> Signed-off-by: Fuad Tabba <tabba@google.com>
>>>> ---
>>>>    include/linux/kvm_host.h |  11 +++++
>>>>    virt/kvm/guest_memfd.c   | 103 +++++++++++++++++++++++++++++++++++++++
>>>>    2 files changed, 114 insertions(+)
>>>>
>>>> diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
>>>> index 3ad0719bfc4f..f9e8b10a4b09 100644
>>>> --- a/include/linux/kvm_host.h
>>>> +++ b/include/linux/kvm_host.h
>>>> @@ -728,6 +728,17 @@ static inline bool kvm_arch_has_private_mem(struct kvm *kvm)
>>>>    }
>>>>    #endif
>>>>
>>>> +/*
>>>> + * Arch code must define kvm_arch_gmem_supports_shared_mem if support for
>>>> + * private memory is enabled and it supports in-place shared/private conversion.
>>>> + */
>>>> +#if !defined(kvm_arch_gmem_supports_shared_mem) && !IS_ENABLED(CONFIG_KVM_PRIVATE_MEM)
>>>> +static inline bool kvm_arch_gmem_supports_shared_mem(struct kvm *kvm)
>>>> +{
>>>> +     return false;
>>>> +}
>>>> +#endif
>>>> +
>>>>    #ifndef kvm_arch_has_readonly_mem
>>>>    static inline bool kvm_arch_has_readonly_mem(struct kvm *kvm)
>>>>    {
>>>> diff --git a/virt/kvm/guest_memfd.c b/virt/kvm/guest_memfd.c
>>>> index c6f6792bec2a..30b47ff0e6d2 100644
>>>> --- a/virt/kvm/guest_memfd.c
>>>> +++ b/virt/kvm/guest_memfd.c
>>>> @@ -317,9 +317,112 @@ void kvm_gmem_handle_folio_put(struct folio *folio)
>>>>    {
>>>>        WARN_ONCE(1, "A placeholder that shouldn't trigger. Work in progress.");
>>>>    }
>>>> +
>>>> +static bool kvm_gmem_offset_is_shared(struct file *file, pgoff_t index)
>>>> +{
>>>> +     struct kvm_gmem *gmem = file->private_data;
>>>> +
>>>> +     /* For now, VMs that support shared memory share all their memory. */
>>>> +     return kvm_arch_gmem_supports_shared_mem(gmem->kvm);
>>>> +}
>>>> +
>>>> +static vm_fault_t kvm_gmem_fault(struct vm_fault *vmf)
>>>> +{
>>>> +     struct inode *inode = file_inode(vmf->vma->vm_file);
>>>> +     struct folio *folio;
>>>> +     vm_fault_t ret = VM_FAULT_LOCKED;
>>>> +
>>>> +     filemap_invalidate_lock_shared(inode->i_mapping);
>>>> +
>>>> +     folio = kvm_gmem_get_folio(inode, vmf->pgoff);
>>>> +     if (IS_ERR(folio)) {
>>>> +             switch (PTR_ERR(folio)) {
>>>> +             case -EAGAIN:
>>>> +                     ret = VM_FAULT_RETRY;
>>>> +                     break;
>>>> +             case -ENOMEM:
>>>> +                     ret = VM_FAULT_OOM;
>>>> +                     break;
>>>> +             default:
>>>> +                     ret = VM_FAULT_SIGBUS;
>>>> +                     break;
>>>> +             }
>>>> +             goto out_filemap;
>>>> +     }
>>>> +
>>>> +     if (folio_test_hwpoison(folio)) {
>>>> +             ret = VM_FAULT_HWPOISON;
>>>> +             goto out_folio;
>>>> +     }
>>>> +
>>>> +     /* Must be called with folio lock held, i.e., after kvm_gmem_get_folio() */
>>>> +     if (!kvm_gmem_offset_is_shared(vmf->vma->vm_file, vmf->pgoff)) {
>>>> +             ret = VM_FAULT_SIGBUS;
>>>> +             goto out_folio;
>>>> +     }
>>>> +
>>>> +     /*
>>>> +      * Only private folios are marked as "guestmem" so far, and we never
>>>> +      * expect private folios at this point.
>>>> +      */
>>>> +     if (WARN_ON_ONCE(folio_test_guestmem(folio)))  {
>>>> +             ret = VM_FAULT_SIGBUS;
>>>> +             goto out_folio;
>>>> +     }
>>>> +
>>>> +     /* No support for huge pages. */
>>>> +     if (WARN_ON_ONCE(folio_test_large(folio))) {
>>>> +             ret = VM_FAULT_SIGBUS;
>>>> +             goto out_folio;
>>>> +     }
>>>> +
>>>> +     if (!folio_test_uptodate(folio)) {
>>>> +             clear_highpage(folio_page(folio, 0));
>>>> +             kvm_gmem_mark_prepared(folio);
>>>> +     }
>>>
>>> kvm_gmem_get_pfn()->__kvm_gmem_get_pfn() seems to call
>>> kvm_gmem_prepare_folio() instead.
>>>
>>> Could we do the same here?
>>
>> Will do.
> 
> I realized it's not that straightforward. __kvm_gmem_prepare_folio()
> requires the kvm_memory_slot, which is used to calculate the gfn. At
> that point we have neither, and it's not just an issue of access, but
> there might not be a slot associated with that yet.

Hmm, right ... I wonder if that might be problematic. I assume no 
memslot == no memory attribute telling us if it is private or shared at 
least for now?

Once guest_memfd maintains that state, it might be "cleaner" ? What's 
your thought?
Fuad Tabba Feb. 20, 2025, 5:10 p.m. UTC | #5
On Thu, 20 Feb 2025 at 15:58, David Hildenbrand <david@redhat.com> wrote:
>
> On 20.02.25 16:45, Fuad Tabba wrote:
> > Hi David,
> >
> > On Thu, 20 Feb 2025 at 12:04, Fuad Tabba <tabba@google.com> wrote:
> >>
> >> On Thu, 20 Feb 2025 at 11:58, David Hildenbrand <david@redhat.com> wrote:
> >>>
> >>> On 18.02.25 18:24, Fuad Tabba wrote:
> >>>> Add support for mmap() and fault() for guest_memfd backed memory
> >>>> in the host for VMs that support in-place conversion between
> >>>> shared and private. To that end, this patch adds the ability to
> >>>> check whether the VM type supports in-place conversion, and only
> >>>> allows mapping its memory if that's the case.
> >>>>
> >>>> This behavior is also gated by the configuration option
> >>>> KVM_GMEM_SHARED_MEM.
> >>>>
> >>>> Signed-off-by: Fuad Tabba <tabba@google.com>
> >>>> ---
> >>>>    include/linux/kvm_host.h |  11 +++++
> >>>>    virt/kvm/guest_memfd.c   | 103 +++++++++++++++++++++++++++++++++++++++
> >>>>    2 files changed, 114 insertions(+)
> >>>>
> >>>> diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
> >>>> index 3ad0719bfc4f..f9e8b10a4b09 100644
> >>>> --- a/include/linux/kvm_host.h
> >>>> +++ b/include/linux/kvm_host.h
> >>>> @@ -728,6 +728,17 @@ static inline bool kvm_arch_has_private_mem(struct kvm *kvm)
> >>>>    }
> >>>>    #endif
> >>>>
> >>>> +/*
> >>>> + * Arch code must define kvm_arch_gmem_supports_shared_mem if support for
> >>>> + * private memory is enabled and it supports in-place shared/private conversion.
> >>>> + */
> >>>> +#if !defined(kvm_arch_gmem_supports_shared_mem) && !IS_ENABLED(CONFIG_KVM_PRIVATE_MEM)
> >>>> +static inline bool kvm_arch_gmem_supports_shared_mem(struct kvm *kvm)
> >>>> +{
> >>>> +     return false;
> >>>> +}
> >>>> +#endif
> >>>> +
> >>>>    #ifndef kvm_arch_has_readonly_mem
> >>>>    static inline bool kvm_arch_has_readonly_mem(struct kvm *kvm)
> >>>>    {
> >>>> diff --git a/virt/kvm/guest_memfd.c b/virt/kvm/guest_memfd.c
> >>>> index c6f6792bec2a..30b47ff0e6d2 100644
> >>>> --- a/virt/kvm/guest_memfd.c
> >>>> +++ b/virt/kvm/guest_memfd.c
> >>>> @@ -317,9 +317,112 @@ void kvm_gmem_handle_folio_put(struct folio *folio)
> >>>>    {
> >>>>        WARN_ONCE(1, "A placeholder that shouldn't trigger. Work in progress.");
> >>>>    }
> >>>> +
> >>>> +static bool kvm_gmem_offset_is_shared(struct file *file, pgoff_t index)
> >>>> +{
> >>>> +     struct kvm_gmem *gmem = file->private_data;
> >>>> +
> >>>> +     /* For now, VMs that support shared memory share all their memory. */
> >>>> +     return kvm_arch_gmem_supports_shared_mem(gmem->kvm);
> >>>> +}
> >>>> +
> >>>> +static vm_fault_t kvm_gmem_fault(struct vm_fault *vmf)
> >>>> +{
> >>>> +     struct inode *inode = file_inode(vmf->vma->vm_file);
> >>>> +     struct folio *folio;
> >>>> +     vm_fault_t ret = VM_FAULT_LOCKED;
> >>>> +
> >>>> +     filemap_invalidate_lock_shared(inode->i_mapping);
> >>>> +
> >>>> +     folio = kvm_gmem_get_folio(inode, vmf->pgoff);
> >>>> +     if (IS_ERR(folio)) {
> >>>> +             switch (PTR_ERR(folio)) {
> >>>> +             case -EAGAIN:
> >>>> +                     ret = VM_FAULT_RETRY;
> >>>> +                     break;
> >>>> +             case -ENOMEM:
> >>>> +                     ret = VM_FAULT_OOM;
> >>>> +                     break;
> >>>> +             default:
> >>>> +                     ret = VM_FAULT_SIGBUS;
> >>>> +                     break;
> >>>> +             }
> >>>> +             goto out_filemap;
> >>>> +     }
> >>>> +
> >>>> +     if (folio_test_hwpoison(folio)) {
> >>>> +             ret = VM_FAULT_HWPOISON;
> >>>> +             goto out_folio;
> >>>> +     }
> >>>> +
> >>>> +     /* Must be called with folio lock held, i.e., after kvm_gmem_get_folio() */
> >>>> +     if (!kvm_gmem_offset_is_shared(vmf->vma->vm_file, vmf->pgoff)) {
> >>>> +             ret = VM_FAULT_SIGBUS;
> >>>> +             goto out_folio;
> >>>> +     }
> >>>> +
> >>>> +     /*
> >>>> +      * Only private folios are marked as "guestmem" so far, and we never
> >>>> +      * expect private folios at this point.
> >>>> +      */
> >>>> +     if (WARN_ON_ONCE(folio_test_guestmem(folio)))  {
> >>>> +             ret = VM_FAULT_SIGBUS;
> >>>> +             goto out_folio;
> >>>> +     }
> >>>> +
> >>>> +     /* No support for huge pages. */
> >>>> +     if (WARN_ON_ONCE(folio_test_large(folio))) {
> >>>> +             ret = VM_FAULT_SIGBUS;
> >>>> +             goto out_folio;
> >>>> +     }
> >>>> +
> >>>> +     if (!folio_test_uptodate(folio)) {
> >>>> +             clear_highpage(folio_page(folio, 0));
> >>>> +             kvm_gmem_mark_prepared(folio);
> >>>> +     }
> >>>
> >>> kvm_gmem_get_pfn()->__kvm_gmem_get_pfn() seems to call
> >>> kvm_gmem_prepare_folio() instead.
> >>>
> >>> Could we do the same here?
> >>
> >> Will do.
> >
> > I realized it's not that straightforward. __kvm_gmem_prepare_folio()
> > requires the kvm_memory_slot, which is used to calculate the gfn. At
> > that point we have neither, and it's not just an issue of access, but
> > there might not be a slot associated with that yet.
>
> Hmm, right ... I wonder if that might be problematic. I assume no
> memslot == no memory attribute telling us if it is private or shared at
> least for now?
>
> Once guest_memfd maintains that state, it might be "cleaner" ? What's
> your thought?

The idea is that this doesn't determine whether it's shared or private
by the guest_memfd's attributes, but by the new state added in the
other patch series. That's independent of memslots and guest addresses
altogether.

One scenario you can imagine is the host wanting to fault in memory to
initialize it before associating it with a memslot. I guess we could
make it a requirement that you cannot fault-in pages unless they are
associated with a memslot, but that might be too restrictive.

Cheers,
/fuad



> --
> Cheers,
>
> David / dhildenb
>
David Hildenbrand Feb. 20, 2025, 5:12 p.m. UTC | #6
On 20.02.25 18:10, Fuad Tabba wrote:
> On Thu, 20 Feb 2025 at 15:58, David Hildenbrand <david@redhat.com> wrote:
>>
>> On 20.02.25 16:45, Fuad Tabba wrote:
>>> Hi David,
>>>
>>> On Thu, 20 Feb 2025 at 12:04, Fuad Tabba <tabba@google.com> wrote:
>>>>
>>>> On Thu, 20 Feb 2025 at 11:58, David Hildenbrand <david@redhat.com> wrote:
>>>>>
>>>>> On 18.02.25 18:24, Fuad Tabba wrote:
>>>>>> Add support for mmap() and fault() for guest_memfd backed memory
>>>>>> in the host for VMs that support in-place conversion between
>>>>>> shared and private. To that end, this patch adds the ability to
>>>>>> check whether the VM type supports in-place conversion, and only
>>>>>> allows mapping its memory if that's the case.
>>>>>>
>>>>>> This behavior is also gated by the configuration option
>>>>>> KVM_GMEM_SHARED_MEM.
>>>>>>
>>>>>> Signed-off-by: Fuad Tabba <tabba@google.com>
>>>>>> ---
>>>>>>     include/linux/kvm_host.h |  11 +++++
>>>>>>     virt/kvm/guest_memfd.c   | 103 +++++++++++++++++++++++++++++++++++++++
>>>>>>     2 files changed, 114 insertions(+)
>>>>>>
>>>>>> diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
>>>>>> index 3ad0719bfc4f..f9e8b10a4b09 100644
>>>>>> --- a/include/linux/kvm_host.h
>>>>>> +++ b/include/linux/kvm_host.h
>>>>>> @@ -728,6 +728,17 @@ static inline bool kvm_arch_has_private_mem(struct kvm *kvm)
>>>>>>     }
>>>>>>     #endif
>>>>>>
>>>>>> +/*
>>>>>> + * Arch code must define kvm_arch_gmem_supports_shared_mem if support for
>>>>>> + * private memory is enabled and it supports in-place shared/private conversion.
>>>>>> + */
>>>>>> +#if !defined(kvm_arch_gmem_supports_shared_mem) && !IS_ENABLED(CONFIG_KVM_PRIVATE_MEM)
>>>>>> +static inline bool kvm_arch_gmem_supports_shared_mem(struct kvm *kvm)
>>>>>> +{
>>>>>> +     return false;
>>>>>> +}
>>>>>> +#endif
>>>>>> +
>>>>>>     #ifndef kvm_arch_has_readonly_mem
>>>>>>     static inline bool kvm_arch_has_readonly_mem(struct kvm *kvm)
>>>>>>     {
>>>>>> diff --git a/virt/kvm/guest_memfd.c b/virt/kvm/guest_memfd.c
>>>>>> index c6f6792bec2a..30b47ff0e6d2 100644
>>>>>> --- a/virt/kvm/guest_memfd.c
>>>>>> +++ b/virt/kvm/guest_memfd.c
>>>>>> @@ -317,9 +317,112 @@ void kvm_gmem_handle_folio_put(struct folio *folio)
>>>>>>     {
>>>>>>         WARN_ONCE(1, "A placeholder that shouldn't trigger. Work in progress.");
>>>>>>     }
>>>>>> +
>>>>>> +static bool kvm_gmem_offset_is_shared(struct file *file, pgoff_t index)
>>>>>> +{
>>>>>> +     struct kvm_gmem *gmem = file->private_data;
>>>>>> +
>>>>>> +     /* For now, VMs that support shared memory share all their memory. */
>>>>>> +     return kvm_arch_gmem_supports_shared_mem(gmem->kvm);
>>>>>> +}
>>>>>> +
>>>>>> +static vm_fault_t kvm_gmem_fault(struct vm_fault *vmf)
>>>>>> +{
>>>>>> +     struct inode *inode = file_inode(vmf->vma->vm_file);
>>>>>> +     struct folio *folio;
>>>>>> +     vm_fault_t ret = VM_FAULT_LOCKED;
>>>>>> +
>>>>>> +     filemap_invalidate_lock_shared(inode->i_mapping);
>>>>>> +
>>>>>> +     folio = kvm_gmem_get_folio(inode, vmf->pgoff);
>>>>>> +     if (IS_ERR(folio)) {
>>>>>> +             switch (PTR_ERR(folio)) {
>>>>>> +             case -EAGAIN:
>>>>>> +                     ret = VM_FAULT_RETRY;
>>>>>> +                     break;
>>>>>> +             case -ENOMEM:
>>>>>> +                     ret = VM_FAULT_OOM;
>>>>>> +                     break;
>>>>>> +             default:
>>>>>> +                     ret = VM_FAULT_SIGBUS;
>>>>>> +                     break;
>>>>>> +             }
>>>>>> +             goto out_filemap;
>>>>>> +     }
>>>>>> +
>>>>>> +     if (folio_test_hwpoison(folio)) {
>>>>>> +             ret = VM_FAULT_HWPOISON;
>>>>>> +             goto out_folio;
>>>>>> +     }
>>>>>> +
>>>>>> +     /* Must be called with folio lock held, i.e., after kvm_gmem_get_folio() */
>>>>>> +     if (!kvm_gmem_offset_is_shared(vmf->vma->vm_file, vmf->pgoff)) {
>>>>>> +             ret = VM_FAULT_SIGBUS;
>>>>>> +             goto out_folio;
>>>>>> +     }
>>>>>> +
>>>>>> +     /*
>>>>>> +      * Only private folios are marked as "guestmem" so far, and we never
>>>>>> +      * expect private folios at this point.
>>>>>> +      */
>>>>>> +     if (WARN_ON_ONCE(folio_test_guestmem(folio)))  {
>>>>>> +             ret = VM_FAULT_SIGBUS;
>>>>>> +             goto out_folio;
>>>>>> +     }
>>>>>> +
>>>>>> +     /* No support for huge pages. */
>>>>>> +     if (WARN_ON_ONCE(folio_test_large(folio))) {
>>>>>> +             ret = VM_FAULT_SIGBUS;
>>>>>> +             goto out_folio;
>>>>>> +     }
>>>>>> +
>>>>>> +     if (!folio_test_uptodate(folio)) {
>>>>>> +             clear_highpage(folio_page(folio, 0));
>>>>>> +             kvm_gmem_mark_prepared(folio);
>>>>>> +     }
>>>>>
>>>>> kvm_gmem_get_pfn()->__kvm_gmem_get_pfn() seems to call
>>>>> kvm_gmem_prepare_folio() instead.
>>>>>
>>>>> Could we do the same here?
>>>>
>>>> Will do.
>>>
>>> I realized it's not that straightforward. __kvm_gmem_prepare_folio()
>>> requires the kvm_memory_slot, which is used to calculate the gfn. At
>>> that point we have neither, and it's not just an issue of access, but
>>> there might not be a slot associated with that yet.
>>
>> Hmm, right ... I wonder if that might be problematic. I assume no
>> memslot == no memory attribute telling us if it is private or shared at
>> least for now?
>>
>> Once guest_memfd maintains that state, it might be "cleaner" ? What's
>> your thought?
> 
> The idea is that this doesn't determine whether it's shared or private
> by the guest_memfd's attributes, but by the new state added in the
> other patch series. That's independent of memslots and guest addresses
> altogether.
> 
> One scenario you can imagine is the host wanting to fault in memory to
> initialize it before associating it with a memslot. I guess we could
> make it a requirement that you cannot fault-in pages unless they are
> associated with a memslot, but that might be too restrictive.

Okay, just what I thought, thanks!
diff mbox series

Patch

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 3ad0719bfc4f..f9e8b10a4b09 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -728,6 +728,17 @@  static inline bool kvm_arch_has_private_mem(struct kvm *kvm)
 }
 #endif
 
+/*
+ * Arch code must define kvm_arch_gmem_supports_shared_mem if support for
+ * private memory is enabled and it supports in-place shared/private conversion.
+ */
+#if !defined(kvm_arch_gmem_supports_shared_mem) && !IS_ENABLED(CONFIG_KVM_PRIVATE_MEM)
+static inline bool kvm_arch_gmem_supports_shared_mem(struct kvm *kvm)
+{
+	return false;
+}
+#endif
+
 #ifndef kvm_arch_has_readonly_mem
 static inline bool kvm_arch_has_readonly_mem(struct kvm *kvm)
 {
diff --git a/virt/kvm/guest_memfd.c b/virt/kvm/guest_memfd.c
index c6f6792bec2a..30b47ff0e6d2 100644
--- a/virt/kvm/guest_memfd.c
+++ b/virt/kvm/guest_memfd.c
@@ -317,9 +317,112 @@  void kvm_gmem_handle_folio_put(struct folio *folio)
 {
 	WARN_ONCE(1, "A placeholder that shouldn't trigger. Work in progress.");
 }
+
+static bool kvm_gmem_offset_is_shared(struct file *file, pgoff_t index)
+{
+	struct kvm_gmem *gmem = file->private_data;
+
+	/* For now, VMs that support shared memory share all their memory. */
+	return kvm_arch_gmem_supports_shared_mem(gmem->kvm);
+}
+
+static vm_fault_t kvm_gmem_fault(struct vm_fault *vmf)
+{
+	struct inode *inode = file_inode(vmf->vma->vm_file);
+	struct folio *folio;
+	vm_fault_t ret = VM_FAULT_LOCKED;
+
+	filemap_invalidate_lock_shared(inode->i_mapping);
+
+	folio = kvm_gmem_get_folio(inode, vmf->pgoff);
+	if (IS_ERR(folio)) {
+		switch (PTR_ERR(folio)) {
+		case -EAGAIN:
+			ret = VM_FAULT_RETRY;
+			break;
+		case -ENOMEM:
+			ret = VM_FAULT_OOM;
+			break;
+		default:
+			ret = VM_FAULT_SIGBUS;
+			break;
+		}
+		goto out_filemap;
+	}
+
+	if (folio_test_hwpoison(folio)) {
+		ret = VM_FAULT_HWPOISON;
+		goto out_folio;
+	}
+
+	/* Must be called with folio lock held, i.e., after kvm_gmem_get_folio() */
+	if (!kvm_gmem_offset_is_shared(vmf->vma->vm_file, vmf->pgoff)) {
+		ret = VM_FAULT_SIGBUS;
+		goto out_folio;
+	}
+
+	/*
+	 * Only private folios are marked as "guestmem" so far, and we never
+	 * expect private folios at this point.
+	 */
+	if (WARN_ON_ONCE(folio_test_guestmem(folio)))  {
+		ret = VM_FAULT_SIGBUS;
+		goto out_folio;
+	}
+
+	/* No support for huge pages. */
+	if (WARN_ON_ONCE(folio_test_large(folio))) {
+		ret = VM_FAULT_SIGBUS;
+		goto out_folio;
+	}
+
+	if (!folio_test_uptodate(folio)) {
+		clear_highpage(folio_page(folio, 0));
+		kvm_gmem_mark_prepared(folio);
+	}
+
+	vmf->page = folio_file_page(folio, vmf->pgoff);
+
+out_folio:
+	if (ret != VM_FAULT_LOCKED) {
+		folio_unlock(folio);
+		folio_put(folio);
+	}
+
+out_filemap:
+	filemap_invalidate_unlock_shared(inode->i_mapping);
+
+	return ret;
+}
+
+static const struct vm_operations_struct kvm_gmem_vm_ops = {
+	.fault = kvm_gmem_fault,
+};
+
+static int kvm_gmem_mmap(struct file *file, struct vm_area_struct *vma)
+{
+	struct kvm_gmem *gmem = file->private_data;
+
+	if (!kvm_arch_gmem_supports_shared_mem(gmem->kvm))
+		return -ENODEV;
+
+	if ((vma->vm_flags & (VM_SHARED | VM_MAYSHARE)) !=
+	    (VM_SHARED | VM_MAYSHARE)) {
+		return -EINVAL;
+	}
+
+	file_accessed(file);
+	vm_flags_set(vma, VM_DONTDUMP);
+	vma->vm_ops = &kvm_gmem_vm_ops;
+
+	return 0;
+}
+#else
+#define kvm_gmem_mmap NULL
 #endif /* CONFIG_KVM_GMEM_SHARED_MEM */
 
 static struct file_operations kvm_gmem_fops = {
+	.mmap		= kvm_gmem_mmap,
 	.open		= generic_file_open,
 	.release	= kvm_gmem_release,
 	.fallocate	= kvm_gmem_fallocate,