Message ID | 20241108155056.332412-2-pbonzini@redhat.com (mailing list archive) |
---|---|
State | New |
Headers | show |
Series | KVM: gmem: track preparedness a page at a time | expand |
+Ackerley, who's also working on resurrecting the file system[*]. At a glance, there appear to be non-trivial differences, e.g. Ackerley's version has a call to security_inode_init_security_anon(). I've paged out much of the inode stuff, so I trust Ackerley's judgment far, far more than my own :-) [*] https://lore.kernel.org/all/d1940d466fc69472c8b6dda95df2e0522b2d8744.1726009989.git.ackerleytng@google.com On Fri, Nov 08, 2024, Paolo Bonzini wrote: > In preparation for removing the usage of the uptodate flag, > reintroduce the gmem filesystem type. We need it in order to > free the private inode information. > > Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> > --- > include/uapi/linux/magic.h | 1 + > virt/kvm/guest_memfd.c | 117 +++++++++++++++++++++++++++++++++---- > virt/kvm/kvm_main.c | 7 ++- > virt/kvm/kvm_mm.h | 8 ++- > 4 files changed, 119 insertions(+), 14 deletions(-) > > diff --git a/include/uapi/linux/magic.h b/include/uapi/linux/magic.h > index bb575f3ab45e..d856dd6a7ed9 100644 > --- a/include/uapi/linux/magic.h > +++ b/include/uapi/linux/magic.h > @@ -103,5 +103,6 @@ > #define DEVMEM_MAGIC 0x454d444d /* "DMEM" */ > #define SECRETMEM_MAGIC 0x5345434d /* "SECM" */ > #define PID_FS_MAGIC 0x50494446 /* "PIDF" */ > +#define KVM_GUEST_MEM_MAGIC 0x474d454d /* "GMEM" */ > > #endif /* __LINUX_MAGIC_H__ */ > diff --git a/virt/kvm/guest_memfd.c b/virt/kvm/guest_memfd.c > index 8f079a61a56d..3ea5a7597fd4 100644 > --- a/virt/kvm/guest_memfd.c > +++ b/virt/kvm/guest_memfd.c > @@ -4,9 +4,74 @@ > #include <linux/kvm_host.h> > #include <linux/pagemap.h> > #include <linux/anon_inodes.h> > +#include <linux/pseudo_fs.h> > > #include "kvm_mm.h" > > +/* Do all the filesystem crap just for evict_inode... */ > + > +static struct vfsmount *kvm_gmem_mnt __read_mostly; > + > +static void gmem_evict_inode(struct inode *inode) > +{ > + kvfree(inode->i_private); > + truncate_inode_pages_final(&inode->i_data); > + clear_inode(inode); > +} > + > +static const struct super_operations gmem_super_operations = { > + .drop_inode = generic_delete_inode, > + .evict_inode = gmem_evict_inode, > + .statfs = simple_statfs, > +}; > + > +static int gmem_init_fs_context(struct fs_context *fc) > +{ > + struct pseudo_fs_context *ctx = init_pseudo(fc, KVM_GUEST_MEM_MAGIC); > + if (!ctx) > + return -ENOMEM; > + > + ctx->ops = &gmem_super_operations; > + return 0; > +} > + > +static struct file_system_type kvm_gmem_fs_type = { > + .name = "kvm_gmemfs", > + .init_fs_context = gmem_init_fs_context, > + .kill_sb = kill_anon_super, > +}; > + > +static struct file *kvm_gmem_create_file(const char *name, const struct file_operations *fops) > +{ > + struct inode *inode; > + struct file *file; > + > + if (fops->owner && !try_module_get(fops->owner)) > + return ERR_PTR(-ENOENT); > + > + inode = alloc_anon_inode(kvm_gmem_mnt->mnt_sb); > + if (IS_ERR(inode)) { > + file = ERR_CAST(inode); > + goto err; > + } > + file = alloc_file_pseudo(inode, kvm_gmem_mnt, name, O_RDWR, fops); > + if (IS_ERR(file)) > + goto err_iput; > + > + return file; > + > +err_iput: > + iput(inode); > +err: > + module_put(fops->owner); > + return file; > +} > + > + > +struct kvm_gmem_inode { > + unsigned long flags; > +}; > + > struct kvm_gmem { > struct kvm *kvm; > struct xarray bindings; > @@ -308,9 +373,31 @@ static struct file_operations kvm_gmem_fops = { > .fallocate = kvm_gmem_fallocate, > }; > > -void kvm_gmem_init(struct module *module) > +int kvm_gmem_init(struct module *module) > { > + int ret; > + > + ret = register_filesystem(&kvm_gmem_fs_type); > + if (ret) { > + pr_err("kvm-gmem: cannot register file system (%d)\n", ret); > + return ret; > + } > + > + kvm_gmem_mnt = kern_mount(&kvm_gmem_fs_type); > + if (IS_ERR(kvm_gmem_mnt)) { > + pr_err("kvm-gmem: kernel mount failed (%ld)\n", PTR_ERR(kvm_gmem_mnt)); > + return PTR_ERR(kvm_gmem_mnt); > + } > + > kvm_gmem_fops.owner = module; > + > + return 0; > +} > + > +void kvm_gmem_exit(void) > +{ > + kern_unmount(kvm_gmem_mnt); > + unregister_filesystem(&kvm_gmem_fs_type); > } > > static int kvm_gmem_migrate_folio(struct address_space *mapping, > @@ -394,15 +481,23 @@ static const struct inode_operations kvm_gmem_iops = { > > static int __kvm_gmem_create(struct kvm *kvm, loff_t size, u64 flags) > { > - const char *anon_name = "[kvm-gmem]"; > + const char *gmem_name = "[kvm-gmem]"; > + struct kvm_gmem_inode *i_gmem; > struct kvm_gmem *gmem; > struct inode *inode; > struct file *file; > int fd, err; > > + i_gmem = kvzalloc(sizeof(struct kvm_gmem_inode), GFP_KERNEL); > + if (!i_gmem) > + return -ENOMEM; > + i_gmem->flags = flags; > + > fd = get_unused_fd_flags(0); > - if (fd < 0) > - return fd; > + if (fd < 0) { > + err = fd; > + goto err_i_gmem; > + } > > gmem = kzalloc(sizeof(*gmem), GFP_KERNEL); > if (!gmem) { > @@ -410,19 +505,19 @@ static int __kvm_gmem_create(struct kvm *kvm, loff_t size, u64 flags) > goto err_fd; > } > > - file = anon_inode_create_getfile(anon_name, &kvm_gmem_fops, gmem, > - O_RDWR, NULL); > + file = kvm_gmem_create_file(gmem_name, &kvm_gmem_fops); > if (IS_ERR(file)) { > err = PTR_ERR(file); > goto err_gmem; > } > > + inode = file->f_inode; > + > + file->f_mapping = inode->i_mapping; > + file->private_data = gmem; > file->f_flags |= O_LARGEFILE; > > - inode = file->f_inode; > - WARN_ON(file->f_mapping != inode->i_mapping); > - > - inode->i_private = (void *)(unsigned long)flags; > + inode->i_private = i_gmem; > inode->i_op = &kvm_gmem_iops; > inode->i_mapping->a_ops = &kvm_gmem_aops; > inode->i_mode |= S_IFREG; > @@ -444,6 +539,8 @@ static int __kvm_gmem_create(struct kvm *kvm, loff_t size, u64 flags) > kfree(gmem); > err_fd: > put_unused_fd(fd); > +err_i_gmem: > + kvfree(i_gmem); > return err; > } > > diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c > index 279e03029ce1..8b7b4e0eb639 100644 > --- a/virt/kvm/kvm_main.c > +++ b/virt/kvm/kvm_main.c > @@ -6504,7 +6504,9 @@ int kvm_init(unsigned vcpu_size, unsigned vcpu_align, struct module *module) > if (WARN_ON_ONCE(r)) > goto err_vfio; > > - kvm_gmem_init(module); > + r = kvm_gmem_init(module); > + if (r) > + goto err_gmem; > > r = kvm_init_virtualization(); > if (r) > @@ -6525,6 +6527,8 @@ int kvm_init(unsigned vcpu_size, unsigned vcpu_align, struct module *module) > err_register: > kvm_uninit_virtualization(); > err_virt: > + kvm_gmem_exit(); > +err_gmem: > kvm_vfio_ops_exit(); > err_vfio: > kvm_async_pf_deinit(); > @@ -6556,6 +6560,7 @@ void kvm_exit(void) > for_each_possible_cpu(cpu) > free_cpumask_var(per_cpu(cpu_kick_mask, cpu)); > kmem_cache_destroy(kvm_vcpu_cache); > + kvm_gmem_exit(); > kvm_vfio_ops_exit(); > kvm_async_pf_deinit(); > kvm_irqfd_exit(); > diff --git a/virt/kvm/kvm_mm.h b/virt/kvm/kvm_mm.h > index 715f19669d01..91e4202574a8 100644 > --- a/virt/kvm/kvm_mm.h > +++ b/virt/kvm/kvm_mm.h > @@ -36,15 +36,17 @@ static inline void gfn_to_pfn_cache_invalidate_start(struct kvm *kvm, > #endif /* HAVE_KVM_PFNCACHE */ > > #ifdef CONFIG_KVM_PRIVATE_MEM > -void kvm_gmem_init(struct module *module); > +int kvm_gmem_init(struct module *module); > +void kvm_gmem_exit(void); > int kvm_gmem_create(struct kvm *kvm, struct kvm_create_guest_memfd *args); > int kvm_gmem_bind(struct kvm *kvm, struct kvm_memory_slot *slot, > unsigned int fd, loff_t offset); > void kvm_gmem_unbind(struct kvm_memory_slot *slot); > #else > -static inline void kvm_gmem_init(struct module *module) > +static inline void kvm_gmem_exit(void) {} > +static inline int kvm_gmem_init(struct module *module) > { > - > + return 0; > } > > static inline int kvm_gmem_bind(struct kvm *kvm, > -- > 2.43.5 > >
diff --git a/include/uapi/linux/magic.h b/include/uapi/linux/magic.h index bb575f3ab45e..d856dd6a7ed9 100644 --- a/include/uapi/linux/magic.h +++ b/include/uapi/linux/magic.h @@ -103,5 +103,6 @@ #define DEVMEM_MAGIC 0x454d444d /* "DMEM" */ #define SECRETMEM_MAGIC 0x5345434d /* "SECM" */ #define PID_FS_MAGIC 0x50494446 /* "PIDF" */ +#define KVM_GUEST_MEM_MAGIC 0x474d454d /* "GMEM" */ #endif /* __LINUX_MAGIC_H__ */ diff --git a/virt/kvm/guest_memfd.c b/virt/kvm/guest_memfd.c index 8f079a61a56d..3ea5a7597fd4 100644 --- a/virt/kvm/guest_memfd.c +++ b/virt/kvm/guest_memfd.c @@ -4,9 +4,74 @@ #include <linux/kvm_host.h> #include <linux/pagemap.h> #include <linux/anon_inodes.h> +#include <linux/pseudo_fs.h> #include "kvm_mm.h" +/* Do all the filesystem crap just for evict_inode... */ + +static struct vfsmount *kvm_gmem_mnt __read_mostly; + +static void gmem_evict_inode(struct inode *inode) +{ + kvfree(inode->i_private); + truncate_inode_pages_final(&inode->i_data); + clear_inode(inode); +} + +static const struct super_operations gmem_super_operations = { + .drop_inode = generic_delete_inode, + .evict_inode = gmem_evict_inode, + .statfs = simple_statfs, +}; + +static int gmem_init_fs_context(struct fs_context *fc) +{ + struct pseudo_fs_context *ctx = init_pseudo(fc, KVM_GUEST_MEM_MAGIC); + if (!ctx) + return -ENOMEM; + + ctx->ops = &gmem_super_operations; + return 0; +} + +static struct file_system_type kvm_gmem_fs_type = { + .name = "kvm_gmemfs", + .init_fs_context = gmem_init_fs_context, + .kill_sb = kill_anon_super, +}; + +static struct file *kvm_gmem_create_file(const char *name, const struct file_operations *fops) +{ + struct inode *inode; + struct file *file; + + if (fops->owner && !try_module_get(fops->owner)) + return ERR_PTR(-ENOENT); + + inode = alloc_anon_inode(kvm_gmem_mnt->mnt_sb); + if (IS_ERR(inode)) { + file = ERR_CAST(inode); + goto err; + } + file = alloc_file_pseudo(inode, kvm_gmem_mnt, name, O_RDWR, fops); + if (IS_ERR(file)) + goto err_iput; + + return file; + +err_iput: + iput(inode); +err: + module_put(fops->owner); + return file; +} + + +struct kvm_gmem_inode { + unsigned long flags; +}; + struct kvm_gmem { struct kvm *kvm; struct xarray bindings; @@ -308,9 +373,31 @@ static struct file_operations kvm_gmem_fops = { .fallocate = kvm_gmem_fallocate, }; -void kvm_gmem_init(struct module *module) +int kvm_gmem_init(struct module *module) { + int ret; + + ret = register_filesystem(&kvm_gmem_fs_type); + if (ret) { + pr_err("kvm-gmem: cannot register file system (%d)\n", ret); + return ret; + } + + kvm_gmem_mnt = kern_mount(&kvm_gmem_fs_type); + if (IS_ERR(kvm_gmem_mnt)) { + pr_err("kvm-gmem: kernel mount failed (%ld)\n", PTR_ERR(kvm_gmem_mnt)); + return PTR_ERR(kvm_gmem_mnt); + } + kvm_gmem_fops.owner = module; + + return 0; +} + +void kvm_gmem_exit(void) +{ + kern_unmount(kvm_gmem_mnt); + unregister_filesystem(&kvm_gmem_fs_type); } static int kvm_gmem_migrate_folio(struct address_space *mapping, @@ -394,15 +481,23 @@ static const struct inode_operations kvm_gmem_iops = { static int __kvm_gmem_create(struct kvm *kvm, loff_t size, u64 flags) { - const char *anon_name = "[kvm-gmem]"; + const char *gmem_name = "[kvm-gmem]"; + struct kvm_gmem_inode *i_gmem; struct kvm_gmem *gmem; struct inode *inode; struct file *file; int fd, err; + i_gmem = kvzalloc(sizeof(struct kvm_gmem_inode), GFP_KERNEL); + if (!i_gmem) + return -ENOMEM; + i_gmem->flags = flags; + fd = get_unused_fd_flags(0); - if (fd < 0) - return fd; + if (fd < 0) { + err = fd; + goto err_i_gmem; + } gmem = kzalloc(sizeof(*gmem), GFP_KERNEL); if (!gmem) { @@ -410,19 +505,19 @@ static int __kvm_gmem_create(struct kvm *kvm, loff_t size, u64 flags) goto err_fd; } - file = anon_inode_create_getfile(anon_name, &kvm_gmem_fops, gmem, - O_RDWR, NULL); + file = kvm_gmem_create_file(gmem_name, &kvm_gmem_fops); if (IS_ERR(file)) { err = PTR_ERR(file); goto err_gmem; } + inode = file->f_inode; + + file->f_mapping = inode->i_mapping; + file->private_data = gmem; file->f_flags |= O_LARGEFILE; - inode = file->f_inode; - WARN_ON(file->f_mapping != inode->i_mapping); - - inode->i_private = (void *)(unsigned long)flags; + inode->i_private = i_gmem; inode->i_op = &kvm_gmem_iops; inode->i_mapping->a_ops = &kvm_gmem_aops; inode->i_mode |= S_IFREG; @@ -444,6 +539,8 @@ static int __kvm_gmem_create(struct kvm *kvm, loff_t size, u64 flags) kfree(gmem); err_fd: put_unused_fd(fd); +err_i_gmem: + kvfree(i_gmem); return err; } diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 279e03029ce1..8b7b4e0eb639 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -6504,7 +6504,9 @@ int kvm_init(unsigned vcpu_size, unsigned vcpu_align, struct module *module) if (WARN_ON_ONCE(r)) goto err_vfio; - kvm_gmem_init(module); + r = kvm_gmem_init(module); + if (r) + goto err_gmem; r = kvm_init_virtualization(); if (r) @@ -6525,6 +6527,8 @@ int kvm_init(unsigned vcpu_size, unsigned vcpu_align, struct module *module) err_register: kvm_uninit_virtualization(); err_virt: + kvm_gmem_exit(); +err_gmem: kvm_vfio_ops_exit(); err_vfio: kvm_async_pf_deinit(); @@ -6556,6 +6560,7 @@ void kvm_exit(void) for_each_possible_cpu(cpu) free_cpumask_var(per_cpu(cpu_kick_mask, cpu)); kmem_cache_destroy(kvm_vcpu_cache); + kvm_gmem_exit(); kvm_vfio_ops_exit(); kvm_async_pf_deinit(); kvm_irqfd_exit(); diff --git a/virt/kvm/kvm_mm.h b/virt/kvm/kvm_mm.h index 715f19669d01..91e4202574a8 100644 --- a/virt/kvm/kvm_mm.h +++ b/virt/kvm/kvm_mm.h @@ -36,15 +36,17 @@ static inline void gfn_to_pfn_cache_invalidate_start(struct kvm *kvm, #endif /* HAVE_KVM_PFNCACHE */ #ifdef CONFIG_KVM_PRIVATE_MEM -void kvm_gmem_init(struct module *module); +int kvm_gmem_init(struct module *module); +void kvm_gmem_exit(void); int kvm_gmem_create(struct kvm *kvm, struct kvm_create_guest_memfd *args); int kvm_gmem_bind(struct kvm *kvm, struct kvm_memory_slot *slot, unsigned int fd, loff_t offset); void kvm_gmem_unbind(struct kvm_memory_slot *slot); #else -static inline void kvm_gmem_init(struct module *module) +static inline void kvm_gmem_exit(void) {} +static inline int kvm_gmem_init(struct module *module) { - + return 0; } static inline int kvm_gmem_bind(struct kvm *kvm,
In preparation for removing the usage of the uptodate flag, reintroduce the gmem filesystem type. We need it in order to free the private inode information. Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> --- include/uapi/linux/magic.h | 1 + virt/kvm/guest_memfd.c | 117 +++++++++++++++++++++++++++++++++---- virt/kvm/kvm_main.c | 7 ++- virt/kvm/kvm_mm.h | 8 ++- 4 files changed, 119 insertions(+), 14 deletions(-)