diff mbox series

[v3,02/11] KVM: guest_memfd: Handle final folio_put() of guest_memfd pages

Message ID 20250211121128.703390-3-tabba@google.com (mailing list archive)
State Superseded
Headers show
Series KVM: Mapping guest_memfd backed memory at the host for software protected VMs | expand

Commit Message

Fuad Tabba Feb. 11, 2025, 12:11 p.m. UTC
Before transitioning a guest_memfd folio to unshared, thereby
disallowing access by the host and allowing the hypervisor to
transition its view of the guest page as private, we need to be
sure that the host doesn't have any references to the folio.

This patch introduces a new type for guest_memfd folios, which
isn't activated in this series but is here as a placeholder and
to facilitate the code in the next patch. This will be used in
the future to register a callback that informs the guest_memfd
subsystem when the last reference is dropped, therefore knowing
that the host doesn't have any remaining references.

Signed-off-by: Fuad Tabba <tabba@google.com>
---
 include/linux/kvm_host.h   |  9 +++++++++
 include/linux/page-flags.h | 17 +++++++++++++++++
 mm/debug.c                 |  1 +
 mm/swap.c                  |  9 +++++++++
 virt/kvm/guest_memfd.c     |  7 +++++++
 5 files changed, 43 insertions(+)

Comments

Peter Xu Feb. 12, 2025, 6:19 p.m. UTC | #1
On Tue, Feb 11, 2025 at 12:11:18PM +0000, Fuad Tabba wrote:
> Before transitioning a guest_memfd folio to unshared, thereby
> disallowing access by the host and allowing the hypervisor to
> transition its view of the guest page as private, we need to be
> sure that the host doesn't have any references to the folio.
> 
> This patch introduces a new type for guest_memfd folios, which
> isn't activated in this series but is here as a placeholder and
> to facilitate the code in the next patch. This will be used in
> the future to register a callback that informs the guest_memfd
> subsystem when the last reference is dropped, therefore knowing
> that the host doesn't have any remaining references.
> 
> Signed-off-by: Fuad Tabba <tabba@google.com>
> ---
>  include/linux/kvm_host.h   |  9 +++++++++
>  include/linux/page-flags.h | 17 +++++++++++++++++
>  mm/debug.c                 |  1 +
>  mm/swap.c                  |  9 +++++++++
>  virt/kvm/guest_memfd.c     |  7 +++++++
>  5 files changed, 43 insertions(+)
> 
> diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
> index f34f4cfaa513..8b5f28f6efff 100644
> --- a/include/linux/kvm_host.h
> +++ b/include/linux/kvm_host.h
> @@ -2571,4 +2571,13 @@ long kvm_arch_vcpu_pre_fault_memory(struct kvm_vcpu *vcpu,
>  				    struct kvm_pre_fault_memory *range);
>  #endif
>  
> +#ifdef CONFIG_KVM_GMEM_SHARED_MEM
> +void kvm_gmem_handle_folio_put(struct folio *folio);
> +#else
> +static inline void kvm_gmem_handle_folio_put(struct folio *folio)
> +{
> +	WARN_ON_ONCE(1);
> +}
> +#endif
> +
>  #endif
> diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
> index 6dc2494bd002..734afda268ab 100644
> --- a/include/linux/page-flags.h
> +++ b/include/linux/page-flags.h
> @@ -933,6 +933,17 @@ enum pagetype {
>  	PGTY_slab	= 0xf5,
>  	PGTY_zsmalloc	= 0xf6,
>  	PGTY_unaccepted	= 0xf7,
> +	/*
> +	 * guestmem folios are used to back VM memory as managed by guest_memfd.
> +	 * Once the last reference is put, instead of freeing these folios back
> +	 * to the page allocator, they are returned to guest_memfd.
> +	 *
> +	 * For now, guestmem will only be set on these folios as long as they
> +	 * cannot be mapped to user space ("private state"), with the plan of
> +	 * always setting that type once typed folios can be mapped to user
> +	 * space cleanly.

Does it imply that gmem folios can be mapped to userspace at some point?
It'll be great if you can share more about it, since as of now it looks
like anything has a page type cannot use the per-page mapcount.

When looking at this, I also found that __folio_rmap_sanity_checks() has
some folio_test_hugetlb() tests, not sure whether they're prone to be
changed too e.g. to cover all pages that have a type, so as to cover gmem.

For the longer term, it'll be definitely nice if gmem folios can be
mapcounted just like normal file folios.  It can enable gmem as a backstore
just like what normal memfd would do, with gmem managing the folios.

> +	 */
> +	PGTY_guestmem	= 0xf8,
>  
>  	PGTY_mapcount_underflow = 0xff
>  };
> @@ -1082,6 +1093,12 @@ FOLIO_TYPE_OPS(hugetlb, hugetlb)
>  FOLIO_TEST_FLAG_FALSE(hugetlb)
>  #endif
>  
> +#ifdef CONFIG_KVM_GMEM_SHARED_MEM

This seems to only be defined in follow up patches.. so may need some
adjustments.

> +FOLIO_TYPE_OPS(guestmem, guestmem)
> +#else
> +FOLIO_TEST_FLAG_FALSE(guestmem)
> +#endif
> +
>  PAGE_TYPE_OPS(Zsmalloc, zsmalloc, zsmalloc)
>  
>  /*
> diff --git a/mm/debug.c b/mm/debug.c
> index 8d2acf432385..08bc42c6cba8 100644
> --- a/mm/debug.c
> +++ b/mm/debug.c
> @@ -56,6 +56,7 @@ static const char *page_type_names[] = {
>  	DEF_PAGETYPE_NAME(table),
>  	DEF_PAGETYPE_NAME(buddy),
>  	DEF_PAGETYPE_NAME(unaccepted),
> +	DEF_PAGETYPE_NAME(guestmem),
>  };
>  
>  static const char *page_type_name(unsigned int page_type)
> diff --git a/mm/swap.c b/mm/swap.c
> index 47bc1bb919cc..241880a46358 100644
> --- a/mm/swap.c
> +++ b/mm/swap.c
> @@ -38,6 +38,10 @@
>  #include <linux/local_lock.h>
>  #include <linux/buffer_head.h>
>  
> +#ifdef CONFIG_KVM_GMEM_SHARED_MEM
> +#include <linux/kvm_host.h>
> +#endif
> +
>  #include "internal.h"
>  
>  #define CREATE_TRACE_POINTS
> @@ -101,6 +105,11 @@ static void free_typed_folio(struct folio *folio)
>  	case PGTY_hugetlb:
>  		free_huge_folio(folio);
>  		return;
> +#endif
> +#ifdef CONFIG_KVM_GMEM_SHARED_MEM
> +	case PGTY_guestmem:
> +		kvm_gmem_handle_folio_put(folio);
> +		return;
>  #endif
>  	default:
>  		WARN_ON_ONCE(1);
> diff --git a/virt/kvm/guest_memfd.c b/virt/kvm/guest_memfd.c
> index b2aa6bf24d3a..c6f6792bec2a 100644
> --- a/virt/kvm/guest_memfd.c
> +++ b/virt/kvm/guest_memfd.c
> @@ -312,6 +312,13 @@ static pgoff_t kvm_gmem_get_index(struct kvm_memory_slot *slot, gfn_t gfn)
>  	return gfn - slot->base_gfn + slot->gmem.pgoff;
>  }
>  
> +#ifdef CONFIG_KVM_GMEM_SHARED_MEM
> +void kvm_gmem_handle_folio_put(struct folio *folio)
> +{
> +	WARN_ONCE(1, "A placeholder that shouldn't trigger. Work in progress.");
> +}
> +#endif /* CONFIG_KVM_GMEM_SHARED_MEM */
> +
>  static struct file_operations kvm_gmem_fops = {
>  	.open		= generic_file_open,
>  	.release	= kvm_gmem_release,
> -- 
> 2.48.1.502.g6dc24dfdaf-goog
> 
>
Fuad Tabba Feb. 13, 2025, 8:29 a.m. UTC | #2
Hi Peter,

On Wed, 12 Feb 2025 at 18:19, Peter Xu <peterx@redhat.com> wrote:
>
> On Tue, Feb 11, 2025 at 12:11:18PM +0000, Fuad Tabba wrote:
> > Before transitioning a guest_memfd folio to unshared, thereby
> > disallowing access by the host and allowing the hypervisor to
> > transition its view of the guest page as private, we need to be
> > sure that the host doesn't have any references to the folio.
> >
> > This patch introduces a new type for guest_memfd folios, which
> > isn't activated in this series but is here as a placeholder and
> > to facilitate the code in the next patch. This will be used in
> > the future to register a callback that informs the guest_memfd
> > subsystem when the last reference is dropped, therefore knowing
> > that the host doesn't have any remaining references.
> >
> > Signed-off-by: Fuad Tabba <tabba@google.com>
> > ---
> >  include/linux/kvm_host.h   |  9 +++++++++
> >  include/linux/page-flags.h | 17 +++++++++++++++++
> >  mm/debug.c                 |  1 +
> >  mm/swap.c                  |  9 +++++++++
> >  virt/kvm/guest_memfd.c     |  7 +++++++
> >  5 files changed, 43 insertions(+)
> >
> > diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
> > index f34f4cfaa513..8b5f28f6efff 100644
> > --- a/include/linux/kvm_host.h
> > +++ b/include/linux/kvm_host.h
> > @@ -2571,4 +2571,13 @@ long kvm_arch_vcpu_pre_fault_memory(struct kvm_vcpu *vcpu,
> >                                   struct kvm_pre_fault_memory *range);
> >  #endif
> >
> > +#ifdef CONFIG_KVM_GMEM_SHARED_MEM
> > +void kvm_gmem_handle_folio_put(struct folio *folio);
> > +#else
> > +static inline void kvm_gmem_handle_folio_put(struct folio *folio)
> > +{
> > +     WARN_ON_ONCE(1);
> > +}
> > +#endif
> > +
> >  #endif
> > diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
> > index 6dc2494bd002..734afda268ab 100644
> > --- a/include/linux/page-flags.h
> > +++ b/include/linux/page-flags.h
> > @@ -933,6 +933,17 @@ enum pagetype {
> >       PGTY_slab       = 0xf5,
> >       PGTY_zsmalloc   = 0xf6,
> >       PGTY_unaccepted = 0xf7,
> > +     /*
> > +      * guestmem folios are used to back VM memory as managed by guest_memfd.
> > +      * Once the last reference is put, instead of freeing these folios back
> > +      * to the page allocator, they are returned to guest_memfd.
> > +      *
> > +      * For now, guestmem will only be set on these folios as long as they
> > +      * cannot be mapped to user space ("private state"), with the plan of
> > +      * always setting that type once typed folios can be mapped to user
> > +      * space cleanly.
>
> Does it imply that gmem folios can be mapped to userspace at some point?
> It'll be great if you can share more about it, since as of now it looks
> like anything has a page type cannot use the per-page mapcount.

This is the goal of this series. By the end of this series, you can
map gmem folios, as long as they belong to a VM type that allows it.
My other series, which will be rebased on this one, adds the
distinction of memory shared with the host vs memory private to the
guest:

https://lore.kernel.org/all/20250117163001.2326672-1-tabba@google.com/

That series deals with the mapcount issue, by only applying the type
once the mapcount is 0. We talked about this in the guest_memfd mm
sync, David Hildenbrand mentioned ongoing work to remove the
overlaying of the type with the memcount. That should solve the
problem completely.


> When looking at this, I also found that __folio_rmap_sanity_checks() has
> some folio_test_hugetlb() tests, not sure whether they're prone to be
> changed too e.g. to cover all pages that have a type, so as to cover gmem.
>
> For the longer term, it'll be definitely nice if gmem folios can be
> mapcounted just like normal file folios.  It can enable gmem as a backstore
> just like what normal memfd would do, with gmem managing the folios.

That's the plan, I agree.

> > +      */
> > +     PGTY_guestmem   = 0xf8,
> >
> >       PGTY_mapcount_underflow = 0xff
> >  };
> > @@ -1082,6 +1093,12 @@ FOLIO_TYPE_OPS(hugetlb, hugetlb)
> >  FOLIO_TEST_FLAG_FALSE(hugetlb)
> >  #endif
> >
> > +#ifdef CONFIG_KVM_GMEM_SHARED_MEM
>
> This seems to only be defined in follow up patches.. so may need some
> adjustments.

It's a configuration option. If you like, I could bring forward the
patch that adds it to the kconfig file.

Thank you,
/fuad

> > +FOLIO_TYPE_OPS(guestmem, guestmem)
> > +#else
> > +FOLIO_TEST_FLAG_FALSE(guestmem)
> > +#endif
> > +
> >  PAGE_TYPE_OPS(Zsmalloc, zsmalloc, zsmalloc)
> >
> >  /*
> > diff --git a/mm/debug.c b/mm/debug.c
> > index 8d2acf432385..08bc42c6cba8 100644
> > --- a/mm/debug.c
> > +++ b/mm/debug.c
> > @@ -56,6 +56,7 @@ static const char *page_type_names[] = {
> >       DEF_PAGETYPE_NAME(table),
> >       DEF_PAGETYPE_NAME(buddy),
> >       DEF_PAGETYPE_NAME(unaccepted),
> > +     DEF_PAGETYPE_NAME(guestmem),
> >  };
> >
> >  static const char *page_type_name(unsigned int page_type)
> > diff --git a/mm/swap.c b/mm/swap.c
> > index 47bc1bb919cc..241880a46358 100644
> > --- a/mm/swap.c
> > +++ b/mm/swap.c
> > @@ -38,6 +38,10 @@
> >  #include <linux/local_lock.h>
> >  #include <linux/buffer_head.h>
> >
> > +#ifdef CONFIG_KVM_GMEM_SHARED_MEM
> > +#include <linux/kvm_host.h>
> > +#endif
> > +
> >  #include "internal.h"
> >
> >  #define CREATE_TRACE_POINTS
> > @@ -101,6 +105,11 @@ static void free_typed_folio(struct folio *folio)
> >       case PGTY_hugetlb:
> >               free_huge_folio(folio);
> >               return;
> > +#endif
> > +#ifdef CONFIG_KVM_GMEM_SHARED_MEM
> > +     case PGTY_guestmem:
> > +             kvm_gmem_handle_folio_put(folio);
> > +             return;
> >  #endif
> >       default:
> >               WARN_ON_ONCE(1);
> > diff --git a/virt/kvm/guest_memfd.c b/virt/kvm/guest_memfd.c
> > index b2aa6bf24d3a..c6f6792bec2a 100644
> > --- a/virt/kvm/guest_memfd.c
> > +++ b/virt/kvm/guest_memfd.c
> > @@ -312,6 +312,13 @@ static pgoff_t kvm_gmem_get_index(struct kvm_memory_slot *slot, gfn_t gfn)
> >       return gfn - slot->base_gfn + slot->gmem.pgoff;
> >  }
> >
> > +#ifdef CONFIG_KVM_GMEM_SHARED_MEM
> > +void kvm_gmem_handle_folio_put(struct folio *folio)
> > +{
> > +     WARN_ONCE(1, "A placeholder that shouldn't trigger. Work in progress.");
> > +}
> > +#endif /* CONFIG_KVM_GMEM_SHARED_MEM */
> > +
> >  static struct file_operations kvm_gmem_fops = {
> >       .open           = generic_file_open,
> >       .release        = kvm_gmem_release,
> > --
> > 2.48.1.502.g6dc24dfdaf-goog
> >
> >
>
> --
> Peter Xu
>
Vlastimil Babka Feb. 17, 2025, 9:49 a.m. UTC | #3
On 2/11/25 13:11, Fuad Tabba wrote:
> Before transitioning a guest_memfd folio to unshared, thereby
> disallowing access by the host and allowing the hypervisor to
> transition its view of the guest page as private, we need to be
> sure that the host doesn't have any references to the folio.
> 
> This patch introduces a new type for guest_memfd folios, which
> isn't activated in this series but is here as a placeholder and
> to facilitate the code in the next patch. This will be used in

It's not clear to me how the code in the next page is facilitated as it
doesn't use any of this?

> the future to register a callback that informs the guest_memfd
> subsystem when the last reference is dropped, therefore knowing
> that the host doesn't have any remaining references.
> 
> Signed-off-by: Fuad Tabba <tabba@google.com>
> ---
>  include/linux/kvm_host.h   |  9 +++++++++
>  include/linux/page-flags.h | 17 +++++++++++++++++
>  mm/debug.c                 |  1 +
>  mm/swap.c                  |  9 +++++++++
>  virt/kvm/guest_memfd.c     |  7 +++++++
>  5 files changed, 43 insertions(+)
> 
> diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
> index f34f4cfaa513..8b5f28f6efff 100644
> --- a/include/linux/kvm_host.h
> +++ b/include/linux/kvm_host.h
> @@ -2571,4 +2571,13 @@ long kvm_arch_vcpu_pre_fault_memory(struct kvm_vcpu *vcpu,
>  				    struct kvm_pre_fault_memory *range);
>  #endif
>  
> +#ifdef CONFIG_KVM_GMEM_SHARED_MEM
> +void kvm_gmem_handle_folio_put(struct folio *folio);
> +#else
> +static inline void kvm_gmem_handle_folio_put(struct folio *folio)
> +{
> +	WARN_ON_ONCE(1);
> +}

Since the caller is guarded by CONFIG_KVM_GMEM_SHARED_MEM, do we need the
CONFIG_KVM_GMEM_SHARED_MEM=n variant at all?

> +#endif
> +
>  #endif
> diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
> index 6dc2494bd002..734afda268ab 100644
> --- a/include/linux/page-flags.h
> +++ b/include/linux/page-flags.h
> @@ -933,6 +933,17 @@ enum pagetype {
>  	PGTY_slab	= 0xf5,
>  	PGTY_zsmalloc	= 0xf6,
>  	PGTY_unaccepted	= 0xf7,
> +	/*
> +	 * guestmem folios are used to back VM memory as managed by guest_memfd.
> +	 * Once the last reference is put, instead of freeing these folios back
> +	 * to the page allocator, they are returned to guest_memfd.
> +	 *
> +	 * For now, guestmem will only be set on these folios as long as they
> +	 * cannot be mapped to user space ("private state"), with the plan of

Might be a bit misleading as I don't think it's set yet as of this series.
But I guess we can keep it to avoid another update later.

> +	 * always setting that type once typed folios can be mapped to user
> +	 * space cleanly.
> +	 */
> +	PGTY_guestmem	= 0xf8,
>  
>  	PGTY_mapcount_underflow = 0xff
>  };
> @@ -1082,6 +1093,12 @@ FOLIO_TYPE_OPS(hugetlb, hugetlb)
>  FOLIO_TEST_FLAG_FALSE(hugetlb)
>  #endif
>  
> +#ifdef CONFIG_KVM_GMEM_SHARED_MEM
> +FOLIO_TYPE_OPS(guestmem, guestmem)
> +#else
> +FOLIO_TEST_FLAG_FALSE(guestmem)
> +#endif
> +
>  PAGE_TYPE_OPS(Zsmalloc, zsmalloc, zsmalloc)
>  
>  /*
> diff --git a/mm/debug.c b/mm/debug.c
> index 8d2acf432385..08bc42c6cba8 100644
> --- a/mm/debug.c
> +++ b/mm/debug.c
> @@ -56,6 +56,7 @@ static const char *page_type_names[] = {
>  	DEF_PAGETYPE_NAME(table),
>  	DEF_PAGETYPE_NAME(buddy),
>  	DEF_PAGETYPE_NAME(unaccepted),
> +	DEF_PAGETYPE_NAME(guestmem),
>  };
>  
>  static const char *page_type_name(unsigned int page_type)
> diff --git a/mm/swap.c b/mm/swap.c
> index 47bc1bb919cc..241880a46358 100644
> --- a/mm/swap.c
> +++ b/mm/swap.c
> @@ -38,6 +38,10 @@
>  #include <linux/local_lock.h>
>  #include <linux/buffer_head.h>
>  
> +#ifdef CONFIG_KVM_GMEM_SHARED_MEM
> +#include <linux/kvm_host.h>
> +#endif

Do we need to guard the include?

> +
>  #include "internal.h"
>  
>  #define CREATE_TRACE_POINTS
> @@ -101,6 +105,11 @@ static void free_typed_folio(struct folio *folio)
>  	case PGTY_hugetlb:
>  		free_huge_folio(folio);
>  		return;
> +#endif
> +#ifdef CONFIG_KVM_GMEM_SHARED_MEM
> +	case PGTY_guestmem:
> +		kvm_gmem_handle_folio_put(folio);
> +		return;
>  #endif
>  	default:
>  		WARN_ON_ONCE(1);
> diff --git a/virt/kvm/guest_memfd.c b/virt/kvm/guest_memfd.c
> index b2aa6bf24d3a..c6f6792bec2a 100644
> --- a/virt/kvm/guest_memfd.c
> +++ b/virt/kvm/guest_memfd.c
> @@ -312,6 +312,13 @@ static pgoff_t kvm_gmem_get_index(struct kvm_memory_slot *slot, gfn_t gfn)
>  	return gfn - slot->base_gfn + slot->gmem.pgoff;
>  }
>  
> +#ifdef CONFIG_KVM_GMEM_SHARED_MEM
> +void kvm_gmem_handle_folio_put(struct folio *folio)
> +{
> +	WARN_ONCE(1, "A placeholder that shouldn't trigger. Work in progress.");
> +}
> +#endif /* CONFIG_KVM_GMEM_SHARED_MEM */
> +
>  static struct file_operations kvm_gmem_fops = {
>  	.open		= generic_file_open,
>  	.release	= kvm_gmem_release,
Fuad Tabba Feb. 17, 2025, 10:12 a.m. UTC | #4
Hi Vlastimil,

On Mon, 17 Feb 2025 at 09:49, Vlastimil Babka <vbabka@suse.cz> wrote:
>
> On 2/11/25 13:11, Fuad Tabba wrote:
> > Before transitioning a guest_memfd folio to unshared, thereby
> > disallowing access by the host and allowing the hypervisor to
> > transition its view of the guest page as private, we need to be
> > sure that the host doesn't have any references to the folio.
> >
> > This patch introduces a new type for guest_memfd folios, which
> > isn't activated in this series but is here as a placeholder and
> > to facilitate the code in the next patch. This will be used in
>
> It's not clear to me how the code in the next page is facilitated as it
> doesn't use any of this?

I'm sorry about that, I'm missing the word "series". i.e.,

> > This patch introduces a new type for guest_memfd folios, which
> > isn't activated in this series but is here as a placeholder and
> > to facilitate the code in the next patch *series*.

I'm referring to this series:
https://lore.kernel.org/all/20250117163001.2326672-1-tabba@google.com/

> > the future to register a callback that informs the guest_memfd
> > subsystem when the last reference is dropped, therefore knowing
> > that the host doesn't have any remaining references.
> >
> > Signed-off-by: Fuad Tabba <tabba@google.com>
> > ---
> >  include/linux/kvm_host.h   |  9 +++++++++
> >  include/linux/page-flags.h | 17 +++++++++++++++++
> >  mm/debug.c                 |  1 +
> >  mm/swap.c                  |  9 +++++++++
> >  virt/kvm/guest_memfd.c     |  7 +++++++
> >  5 files changed, 43 insertions(+)
> >
> > diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
> > index f34f4cfaa513..8b5f28f6efff 100644
> > --- a/include/linux/kvm_host.h
> > +++ b/include/linux/kvm_host.h
> > @@ -2571,4 +2571,13 @@ long kvm_arch_vcpu_pre_fault_memory(struct kvm_vcpu *vcpu,
> >                                   struct kvm_pre_fault_memory *range);
> >  #endif
> >
> > +#ifdef CONFIG_KVM_GMEM_SHARED_MEM
> > +void kvm_gmem_handle_folio_put(struct folio *folio);
> > +#else
> > +static inline void kvm_gmem_handle_folio_put(struct folio *folio)
> > +{
> > +     WARN_ON_ONCE(1);
> > +}
>
> Since the caller is guarded by CONFIG_KVM_GMEM_SHARED_MEM, do we need the
> CONFIG_KVM_GMEM_SHARED_MEM=n variant at all?

No. I'll remove it.

> > +#endif
> > +
> >  #endif
> > diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
> > index 6dc2494bd002..734afda268ab 100644
> > --- a/include/linux/page-flags.h
> > +++ b/include/linux/page-flags.h
> > @@ -933,6 +933,17 @@ enum pagetype {
> >       PGTY_slab       = 0xf5,
> >       PGTY_zsmalloc   = 0xf6,
> >       PGTY_unaccepted = 0xf7,
> > +     /*
> > +      * guestmem folios are used to back VM memory as managed by guest_memfd.
> > +      * Once the last reference is put, instead of freeing these folios back
> > +      * to the page allocator, they are returned to guest_memfd.
> > +      *
> > +      * For now, guestmem will only be set on these folios as long as they
> > +      * cannot be mapped to user space ("private state"), with the plan of
>
> Might be a bit misleading as I don't think it's set yet as of this series.
> But I guess we can keep it to avoid another update later.

You're right, it's not in this series. But as you said, the idea is to
have the least amount of churn in the core mm code.

> > +      * always setting that type once typed folios can be mapped to user
> > +      * space cleanly.
> > +      */
> > +     PGTY_guestmem   = 0xf8,
> >
> >       PGTY_mapcount_underflow = 0xff
> >  };
> > @@ -1082,6 +1093,12 @@ FOLIO_TYPE_OPS(hugetlb, hugetlb)
> >  FOLIO_TEST_FLAG_FALSE(hugetlb)
> >  #endif
> >
> > +#ifdef CONFIG_KVM_GMEM_SHARED_MEM
> > +FOLIO_TYPE_OPS(guestmem, guestmem)
> > +#else
> > +FOLIO_TEST_FLAG_FALSE(guestmem)
> > +#endif
> > +
> >  PAGE_TYPE_OPS(Zsmalloc, zsmalloc, zsmalloc)
> >
> >  /*
> > diff --git a/mm/debug.c b/mm/debug.c
> > index 8d2acf432385..08bc42c6cba8 100644
> > --- a/mm/debug.c
> > +++ b/mm/debug.c
> > @@ -56,6 +56,7 @@ static const char *page_type_names[] = {
> >       DEF_PAGETYPE_NAME(table),
> >       DEF_PAGETYPE_NAME(buddy),
> >       DEF_PAGETYPE_NAME(unaccepted),
> > +     DEF_PAGETYPE_NAME(guestmem),
> >  };
> >
> >  static const char *page_type_name(unsigned int page_type)
> > diff --git a/mm/swap.c b/mm/swap.c
> > index 47bc1bb919cc..241880a46358 100644
> > --- a/mm/swap.c
> > +++ b/mm/swap.c
> > @@ -38,6 +38,10 @@
> >  #include <linux/local_lock.h>
> >  #include <linux/buffer_head.h>
> >
> > +#ifdef CONFIG_KVM_GMEM_SHARED_MEM
> > +#include <linux/kvm_host.h>
> > +#endif
>
> Do we need to guard the include?

Yes, otherwise allnoconfig complains due to many of the x86 things it
drags along if included but KVM isn't configured. I could put it in a
different header that doesn't have this problem, but I couldn't think
of a better header for this.

Thank you,
/fuad

> > +
> >  #include "internal.h"
> >
> >  #define CREATE_TRACE_POINTS
> > @@ -101,6 +105,11 @@ static void free_typed_folio(struct folio *folio)
> >       case PGTY_hugetlb:
> >               free_huge_folio(folio);
> >               return;
> > +#endif
> > +#ifdef CONFIG_KVM_GMEM_SHARED_MEM
> > +     case PGTY_guestmem:
> > +             kvm_gmem_handle_folio_put(folio);
> > +             return;
> >  #endif
> >       default:
> >               WARN_ON_ONCE(1);
> > diff --git a/virt/kvm/guest_memfd.c b/virt/kvm/guest_memfd.c
> > index b2aa6bf24d3a..c6f6792bec2a 100644
> > --- a/virt/kvm/guest_memfd.c
> > +++ b/virt/kvm/guest_memfd.c
> > @@ -312,6 +312,13 @@ static pgoff_t kvm_gmem_get_index(struct kvm_memory_slot *slot, gfn_t gfn)
> >       return gfn - slot->base_gfn + slot->gmem.pgoff;
> >  }
> >
> > +#ifdef CONFIG_KVM_GMEM_SHARED_MEM
> > +void kvm_gmem_handle_folio_put(struct folio *folio)
> > +{
> > +     WARN_ONCE(1, "A placeholder that shouldn't trigger. Work in progress.");
> > +}
> > +#endif /* CONFIG_KVM_GMEM_SHARED_MEM */
> > +
> >  static struct file_operations kvm_gmem_fops = {
> >       .open           = generic_file_open,
> >       .release        = kvm_gmem_release,
>
Vlastimil Babka Feb. 17, 2025, 11:21 a.m. UTC | #5
On 2/17/25 11:12, Fuad Tabba wrote:
> Hi Vlastimil,
> 
> On Mon, 17 Feb 2025 at 09:49, Vlastimil Babka <vbabka@suse.cz> wrote:
>>
>> On 2/11/25 13:11, Fuad Tabba wrote:
>> > Before transitioning a guest_memfd folio to unshared, thereby
>> > disallowing access by the host and allowing the hypervisor to
>> > transition its view of the guest page as private, we need to be
>> > sure that the host doesn't have any references to the folio.
>> >
>> > This patch introduces a new type for guest_memfd folios, which
>> > isn't activated in this series but is here as a placeholder and
>> > to facilitate the code in the next patch. This will be used in
>>
>> It's not clear to me how the code in the next page is facilitated as it
>> doesn't use any of this?
> 
> I'm sorry about that, I'm missing the word "series". i.e.,
> 
>> > This patch introduces a new type for guest_memfd folios, which
>> > isn't activated in this series but is here as a placeholder and
>> > to facilitate the code in the next patch *series*.
> 
> I'm referring to this series:
> https://lore.kernel.org/all/20250117163001.2326672-1-tabba@google.com/
> 
>> > the future to register a callback that informs the guest_memfd
>> > subsystem when the last reference is dropped, therefore knowing
>> > that the host doesn't have any remaining references.
>> >
>> > Signed-off-by: Fuad Tabba <tabba@google.com>
>> > ---
>> >  include/linux/kvm_host.h   |  9 +++++++++
>> >  include/linux/page-flags.h | 17 +++++++++++++++++
>> >  mm/debug.c                 |  1 +
>> >  mm/swap.c                  |  9 +++++++++
>> >  virt/kvm/guest_memfd.c     |  7 +++++++
>> >  5 files changed, 43 insertions(+)
>> >
>> > diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
>> > index f34f4cfaa513..8b5f28f6efff 100644
>> > --- a/include/linux/kvm_host.h
>> > +++ b/include/linux/kvm_host.h
>> > @@ -2571,4 +2571,13 @@ long kvm_arch_vcpu_pre_fault_memory(struct kvm_vcpu *vcpu,
>> >                                   struct kvm_pre_fault_memory *range);
>> >  #endif
>> >
>> > +#ifdef CONFIG_KVM_GMEM_SHARED_MEM
>> > +void kvm_gmem_handle_folio_put(struct folio *folio);
>> > +#else
>> > +static inline void kvm_gmem_handle_folio_put(struct folio *folio)
>> > +{
>> > +     WARN_ON_ONCE(1);
>> > +}
>>
>> Since the caller is guarded by CONFIG_KVM_GMEM_SHARED_MEM, do we need the
>> CONFIG_KVM_GMEM_SHARED_MEM=n variant at all?
> 
> No. I'll remove it.
> 
>> > +#endif
>> > +
>> >  #endif
>> > diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
>> > index 6dc2494bd002..734afda268ab 100644
>> > --- a/include/linux/page-flags.h
>> > +++ b/include/linux/page-flags.h
>> > @@ -933,6 +933,17 @@ enum pagetype {
>> >       PGTY_slab       = 0xf5,
>> >       PGTY_zsmalloc   = 0xf6,
>> >       PGTY_unaccepted = 0xf7,
>> > +     /*
>> > +      * guestmem folios are used to back VM memory as managed by guest_memfd.
>> > +      * Once the last reference is put, instead of freeing these folios back
>> > +      * to the page allocator, they are returned to guest_memfd.
>> > +      *
>> > +      * For now, guestmem will only be set on these folios as long as they
>> > +      * cannot be mapped to user space ("private state"), with the plan of
>>
>> Might be a bit misleading as I don't think it's set yet as of this series.
>> But I guess we can keep it to avoid another update later.
> 
> You're right, it's not in this series. But as you said, the idea is to
> have the least amount of churn in the core mm code.
> 
>> > +      * always setting that type once typed folios can be mapped to user
>> > +      * space cleanly.
>> > +      */
>> > +     PGTY_guestmem   = 0xf8,
>> >
>> >       PGTY_mapcount_underflow = 0xff
>> >  };
>> > @@ -1082,6 +1093,12 @@ FOLIO_TYPE_OPS(hugetlb, hugetlb)
>> >  FOLIO_TEST_FLAG_FALSE(hugetlb)
>> >  #endif
>> >
>> > +#ifdef CONFIG_KVM_GMEM_SHARED_MEM
>> > +FOLIO_TYPE_OPS(guestmem, guestmem)
>> > +#else
>> > +FOLIO_TEST_FLAG_FALSE(guestmem)
>> > +#endif
>> > +
>> >  PAGE_TYPE_OPS(Zsmalloc, zsmalloc, zsmalloc)
>> >
>> >  /*
>> > diff --git a/mm/debug.c b/mm/debug.c
>> > index 8d2acf432385..08bc42c6cba8 100644
>> > --- a/mm/debug.c
>> > +++ b/mm/debug.c
>> > @@ -56,6 +56,7 @@ static const char *page_type_names[] = {
>> >       DEF_PAGETYPE_NAME(table),
>> >       DEF_PAGETYPE_NAME(buddy),
>> >       DEF_PAGETYPE_NAME(unaccepted),
>> > +     DEF_PAGETYPE_NAME(guestmem),
>> >  };
>> >
>> >  static const char *page_type_name(unsigned int page_type)
>> > diff --git a/mm/swap.c b/mm/swap.c
>> > index 47bc1bb919cc..241880a46358 100644
>> > --- a/mm/swap.c
>> > +++ b/mm/swap.c
>> > @@ -38,6 +38,10 @@
>> >  #include <linux/local_lock.h>
>> >  #include <linux/buffer_head.h>
>> >
>> > +#ifdef CONFIG_KVM_GMEM_SHARED_MEM
>> > +#include <linux/kvm_host.h>
>> > +#endif
>>
>> Do we need to guard the include?
> 
> Yes, otherwise allnoconfig complains due to many of the x86 things it
> drags along if included but KVM isn't configured. I could put it in a
> different header that doesn't have this problem, but I couldn't think
> of a better header for this.

Ok, you can add
Acked-by: Vlastimil Babka <vbabka@suse.cz>
Fuad Tabba Feb. 17, 2025, 11:21 a.m. UTC | #6
On Mon, 17 Feb 2025 at 11:21, Vlastimil Babka <vbabka@suse.cz> wrote:
>
> On 2/17/25 11:12, Fuad Tabba wrote:
> > Hi Vlastimil,
> >
> > On Mon, 17 Feb 2025 at 09:49, Vlastimil Babka <vbabka@suse.cz> wrote:
> >>
> >> On 2/11/25 13:11, Fuad Tabba wrote:
> >> > Before transitioning a guest_memfd folio to unshared, thereby
> >> > disallowing access by the host and allowing the hypervisor to
> >> > transition its view of the guest page as private, we need to be
> >> > sure that the host doesn't have any references to the folio.
> >> >
> >> > This patch introduces a new type for guest_memfd folios, which
> >> > isn't activated in this series but is here as a placeholder and
> >> > to facilitate the code in the next patch. This will be used in
> >>
> >> It's not clear to me how the code in the next page is facilitated as it
> >> doesn't use any of this?
> >
> > I'm sorry about that, I'm missing the word "series". i.e.,
> >
> >> > This patch introduces a new type for guest_memfd folios, which
> >> > isn't activated in this series but is here as a placeholder and
> >> > to facilitate the code in the next patch *series*.
> >
> > I'm referring to this series:
> > https://lore.kernel.org/all/20250117163001.2326672-1-tabba@google.com/
> >
> >> > the future to register a callback that informs the guest_memfd
> >> > subsystem when the last reference is dropped, therefore knowing
> >> > that the host doesn't have any remaining references.
> >> >
> >> > Signed-off-by: Fuad Tabba <tabba@google.com>
> >> > ---
> >> >  include/linux/kvm_host.h   |  9 +++++++++
> >> >  include/linux/page-flags.h | 17 +++++++++++++++++
> >> >  mm/debug.c                 |  1 +
> >> >  mm/swap.c                  |  9 +++++++++
> >> >  virt/kvm/guest_memfd.c     |  7 +++++++
> >> >  5 files changed, 43 insertions(+)
> >> >
> >> > diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
> >> > index f34f4cfaa513..8b5f28f6efff 100644
> >> > --- a/include/linux/kvm_host.h
> >> > +++ b/include/linux/kvm_host.h
> >> > @@ -2571,4 +2571,13 @@ long kvm_arch_vcpu_pre_fault_memory(struct kvm_vcpu *vcpu,
> >> >                                   struct kvm_pre_fault_memory *range);
> >> >  #endif
> >> >
> >> > +#ifdef CONFIG_KVM_GMEM_SHARED_MEM
> >> > +void kvm_gmem_handle_folio_put(struct folio *folio);
> >> > +#else
> >> > +static inline void kvm_gmem_handle_folio_put(struct folio *folio)
> >> > +{
> >> > +     WARN_ON_ONCE(1);
> >> > +}
> >>
> >> Since the caller is guarded by CONFIG_KVM_GMEM_SHARED_MEM, do we need the
> >> CONFIG_KVM_GMEM_SHARED_MEM=n variant at all?
> >
> > No. I'll remove it.
> >
> >> > +#endif
> >> > +
> >> >  #endif
> >> > diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
> >> > index 6dc2494bd002..734afda268ab 100644
> >> > --- a/include/linux/page-flags.h
> >> > +++ b/include/linux/page-flags.h
> >> > @@ -933,6 +933,17 @@ enum pagetype {
> >> >       PGTY_slab       = 0xf5,
> >> >       PGTY_zsmalloc   = 0xf6,
> >> >       PGTY_unaccepted = 0xf7,
> >> > +     /*
> >> > +      * guestmem folios are used to back VM memory as managed by guest_memfd.
> >> > +      * Once the last reference is put, instead of freeing these folios back
> >> > +      * to the page allocator, they are returned to guest_memfd.
> >> > +      *
> >> > +      * For now, guestmem will only be set on these folios as long as they
> >> > +      * cannot be mapped to user space ("private state"), with the plan of
> >>
> >> Might be a bit misleading as I don't think it's set yet as of this series.
> >> But I guess we can keep it to avoid another update later.
> >
> > You're right, it's not in this series. But as you said, the idea is to
> > have the least amount of churn in the core mm code.
> >
> >> > +      * always setting that type once typed folios can be mapped to user
> >> > +      * space cleanly.
> >> > +      */
> >> > +     PGTY_guestmem   = 0xf8,
> >> >
> >> >       PGTY_mapcount_underflow = 0xff
> >> >  };
> >> > @@ -1082,6 +1093,12 @@ FOLIO_TYPE_OPS(hugetlb, hugetlb)
> >> >  FOLIO_TEST_FLAG_FALSE(hugetlb)
> >> >  #endif
> >> >
> >> > +#ifdef CONFIG_KVM_GMEM_SHARED_MEM
> >> > +FOLIO_TYPE_OPS(guestmem, guestmem)
> >> > +#else
> >> > +FOLIO_TEST_FLAG_FALSE(guestmem)
> >> > +#endif
> >> > +
> >> >  PAGE_TYPE_OPS(Zsmalloc, zsmalloc, zsmalloc)
> >> >
> >> >  /*
> >> > diff --git a/mm/debug.c b/mm/debug.c
> >> > index 8d2acf432385..08bc42c6cba8 100644
> >> > --- a/mm/debug.c
> >> > +++ b/mm/debug.c
> >> > @@ -56,6 +56,7 @@ static const char *page_type_names[] = {
> >> >       DEF_PAGETYPE_NAME(table),
> >> >       DEF_PAGETYPE_NAME(buddy),
> >> >       DEF_PAGETYPE_NAME(unaccepted),
> >> > +     DEF_PAGETYPE_NAME(guestmem),
> >> >  };
> >> >
> >> >  static const char *page_type_name(unsigned int page_type)
> >> > diff --git a/mm/swap.c b/mm/swap.c
> >> > index 47bc1bb919cc..241880a46358 100644
> >> > --- a/mm/swap.c
> >> > +++ b/mm/swap.c
> >> > @@ -38,6 +38,10 @@
> >> >  #include <linux/local_lock.h>
> >> >  #include <linux/buffer_head.h>
> >> >
> >> > +#ifdef CONFIG_KVM_GMEM_SHARED_MEM
> >> > +#include <linux/kvm_host.h>
> >> > +#endif
> >>
> >> Do we need to guard the include?
> >
> > Yes, otherwise allnoconfig complains due to many of the x86 things it
> > drags along if included but KVM isn't configured. I could put it in a
> > different header that doesn't have this problem, but I couldn't think
> > of a better header for this.
>
> Ok, you can add
> Acked-by: Vlastimil Babka <vbabka@suse.cz>

Thank you!
/fuad
diff mbox series

Patch

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index f34f4cfaa513..8b5f28f6efff 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -2571,4 +2571,13 @@  long kvm_arch_vcpu_pre_fault_memory(struct kvm_vcpu *vcpu,
 				    struct kvm_pre_fault_memory *range);
 #endif
 
+#ifdef CONFIG_KVM_GMEM_SHARED_MEM
+void kvm_gmem_handle_folio_put(struct folio *folio);
+#else
+static inline void kvm_gmem_handle_folio_put(struct folio *folio)
+{
+	WARN_ON_ONCE(1);
+}
+#endif
+
 #endif
diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index 6dc2494bd002..734afda268ab 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -933,6 +933,17 @@  enum pagetype {
 	PGTY_slab	= 0xf5,
 	PGTY_zsmalloc	= 0xf6,
 	PGTY_unaccepted	= 0xf7,
+	/*
+	 * guestmem folios are used to back VM memory as managed by guest_memfd.
+	 * Once the last reference is put, instead of freeing these folios back
+	 * to the page allocator, they are returned to guest_memfd.
+	 *
+	 * For now, guestmem will only be set on these folios as long as they
+	 * cannot be mapped to user space ("private state"), with the plan of
+	 * always setting that type once typed folios can be mapped to user
+	 * space cleanly.
+	 */
+	PGTY_guestmem	= 0xf8,
 
 	PGTY_mapcount_underflow = 0xff
 };
@@ -1082,6 +1093,12 @@  FOLIO_TYPE_OPS(hugetlb, hugetlb)
 FOLIO_TEST_FLAG_FALSE(hugetlb)
 #endif
 
+#ifdef CONFIG_KVM_GMEM_SHARED_MEM
+FOLIO_TYPE_OPS(guestmem, guestmem)
+#else
+FOLIO_TEST_FLAG_FALSE(guestmem)
+#endif
+
 PAGE_TYPE_OPS(Zsmalloc, zsmalloc, zsmalloc)
 
 /*
diff --git a/mm/debug.c b/mm/debug.c
index 8d2acf432385..08bc42c6cba8 100644
--- a/mm/debug.c
+++ b/mm/debug.c
@@ -56,6 +56,7 @@  static const char *page_type_names[] = {
 	DEF_PAGETYPE_NAME(table),
 	DEF_PAGETYPE_NAME(buddy),
 	DEF_PAGETYPE_NAME(unaccepted),
+	DEF_PAGETYPE_NAME(guestmem),
 };
 
 static const char *page_type_name(unsigned int page_type)
diff --git a/mm/swap.c b/mm/swap.c
index 47bc1bb919cc..241880a46358 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -38,6 +38,10 @@ 
 #include <linux/local_lock.h>
 #include <linux/buffer_head.h>
 
+#ifdef CONFIG_KVM_GMEM_SHARED_MEM
+#include <linux/kvm_host.h>
+#endif
+
 #include "internal.h"
 
 #define CREATE_TRACE_POINTS
@@ -101,6 +105,11 @@  static void free_typed_folio(struct folio *folio)
 	case PGTY_hugetlb:
 		free_huge_folio(folio);
 		return;
+#endif
+#ifdef CONFIG_KVM_GMEM_SHARED_MEM
+	case PGTY_guestmem:
+		kvm_gmem_handle_folio_put(folio);
+		return;
 #endif
 	default:
 		WARN_ON_ONCE(1);
diff --git a/virt/kvm/guest_memfd.c b/virt/kvm/guest_memfd.c
index b2aa6bf24d3a..c6f6792bec2a 100644
--- a/virt/kvm/guest_memfd.c
+++ b/virt/kvm/guest_memfd.c
@@ -312,6 +312,13 @@  static pgoff_t kvm_gmem_get_index(struct kvm_memory_slot *slot, gfn_t gfn)
 	return gfn - slot->base_gfn + slot->gmem.pgoff;
 }
 
+#ifdef CONFIG_KVM_GMEM_SHARED_MEM
+void kvm_gmem_handle_folio_put(struct folio *folio)
+{
+	WARN_ONCE(1, "A placeholder that shouldn't trigger. Work in progress.");
+}
+#endif /* CONFIG_KVM_GMEM_SHARED_MEM */
+
 static struct file_operations kvm_gmem_fops = {
 	.open		= generic_file_open,
 	.release	= kvm_gmem_release,