diff mbox series

[RFC,v15,21/30] virt: gunyah: Add ioctl to bind guestmem to VMs

Message ID 20231215-gunyah-v15-21-192a5d872a30@quicinc.com (mailing list archive)
State New, archived
Headers show
Series Drivers for Gunyah hypervisor | expand

Commit Message

Elliot Berman Dec. 16, 2023, 12:21 a.m. UTC
A maple tree is used to maintain a map from guest address ranges to a
guestmemfd that provides the memory for that range of memory for the
guest. The mapping of guest address range to guestmemfd is called a
binding. Implement an ioctl to add/remove bindings to the virtual
machine. The binding determines whether the memory is shared (host
retains access) or lent (host loses access).

Signed-off-by: Elliot Berman <quic_eberman@quicinc.com>
---
 drivers/virt/gunyah/guest_memfd.c | 277 ++++++++++++++++++++++++++++++++++++++
 drivers/virt/gunyah/vm_mgr.c      |  15 +++
 drivers/virt/gunyah/vm_mgr.h      |   6 +
 include/uapi/linux/gunyah.h       |  41 ++++++
 4 files changed, 339 insertions(+)
diff mbox series

Patch

diff --git a/drivers/virt/gunyah/guest_memfd.c b/drivers/virt/gunyah/guest_memfd.c
index 709aae9a1f44..c38380c4dc50 100644
--- a/drivers/virt/gunyah/guest_memfd.c
+++ b/drivers/virt/gunyah/guest_memfd.c
@@ -9,11 +9,61 @@ 
 #include <linux/types.h>
 #include <linux/falloc.h>
 #include <linux/file.h>
+#include <linux/maple_tree.h>
 #include <linux/migrate.h>
 #include <linux/pagemap.h>
 
 #include <uapi/linux/gunyah.h>
 
+#include "vm_mgr.h"
+
+/**
+ * struct gunyah_gmem_binding - Represents a binding of guestmem to a Gunyah VM
+ * @gfn: Guest address to place acquired folios
+ * @ghvm: Pointer to Gunyah VM in this binding
+ * @mt: Maple tree to track folios which have been provided to the VM
+ * @i_off: offset into the guestmem to grab folios from
+ * @inode: Pointer to guest mem inode
+ * @i_entry: list entry for inode->i_private_list
+ * @flags: Access flags for the binding
+ * @nr: Number of pages covered by this binding
+ */
+struct gunyah_gmem_binding {
+	u64 gfn;
+	struct gunyah_vm *ghvm;
+	struct maple_tree mt;
+
+	pgoff_t i_off;
+	struct inode *inode;
+	struct list_head i_entry;
+
+	u32 flags;
+	unsigned long nr;
+};
+
+static inline pgoff_t gunyah_gfn_to_off(struct gunyah_gmem_binding *b, u64 gfn)
+{
+	return gfn - b->gfn + b->i_off;
+}
+
+static inline u64 gunyah_off_to_gfn(struct gunyah_gmem_binding *b, pgoff_t off)
+{
+	return off - b->i_off + b->gfn;
+}
+
+static inline bool gunyah_guest_mem_is_lend(struct gunyah_vm *ghvm, u32 flags)
+{
+	u8 access = flags & GUNYAH_MEM_ACCESS_MASK;
+
+	if (access == GUNYAH_MEM_FORCE_LEND)
+		return true;
+	else if (access == GUNYAH_MEM_FORCE_SHARE)
+		return false;
+
+	/* RM requires all VMs to be protected (isolated) */
+	return true;
+}
+
 static struct folio *gunyah_gmem_get_huge_folio(struct inode *inode,
 						pgoff_t index)
 {
@@ -191,8 +241,15 @@  static long gunyah_gmem_fallocate(struct file *file, int mode, loff_t offset,
 
 static int gunyah_gmem_release(struct inode *inode, struct file *file)
 {
+	struct gunyah_gmem_binding *b, *n;
+
 	gunyah_gmem_punch_hole(inode, 0, U64_MAX);
 
+	list_for_each_entry_safe(b, n, &inode->i_mapping->i_private_list,
+				 i_entry) {
+		gunyah_gmem_remove_binding(b);
+	}
+
 	return 0;
 }
 
@@ -267,3 +324,223 @@  int gunyah_guest_mem_create(struct gunyah_create_mem_args *args)
 	put_unused_fd(fd);
 	return err;
 }
+
+void gunyah_gmem_remove_binding(struct gunyah_gmem_binding *b)
+{
+	mtree_erase(&b->ghvm->mem_layout, b->gfn);
+	list_del(&b->i_entry);
+	kfree(b);
+}
+
+static inline unsigned long gunyah_gmem_page_mask(struct inode *inode)
+{
+	unsigned long gmem_flags = (unsigned long)inode->i_private;
+
+	if (gmem_flags & GHMF_ALLOW_HUGEPAGE) {
+#if IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE)
+		return HPAGE_PMD_MASK;
+#else
+		return ULONG_MAX;
+#endif
+	}
+
+	return PAGE_MASK;
+}
+
+static int gunyah_gmem_init_binding(struct gunyah_vm *ghvm, struct inode *inode,
+				    struct gunyah_map_mem_args *args,
+				    struct gunyah_gmem_binding *binding)
+{
+	const unsigned long page_mask = ~gunyah_gmem_page_mask(inode);
+
+	if (args->flags & ~(GUNYAH_MEM_ALLOW_RWX | GUNYAH_MEM_ACCESS_MASK))
+		return -EINVAL;
+
+	if (args->guest_addr & page_mask)
+		return -EINVAL;
+
+	if (args->offset & page_mask)
+		return -EINVAL;
+
+	if (args->size & page_mask)
+		return -EINVAL;
+
+	binding->gfn = gunyah_gpa_to_gfn(args->guest_addr);
+	binding->ghvm = ghvm;
+	binding->i_off = args->offset >> PAGE_SHIFT;
+	binding->inode = inode;
+	binding->flags = args->flags;
+	binding->nr = args->size >> PAGE_SHIFT;
+
+	return 0;
+}
+
+static int gunyah_gmem_remove_mapping(struct gunyah_vm *ghvm,
+				      struct inode *inode,
+				      struct gunyah_map_mem_args *args)
+{
+	struct gunyah_gmem_binding argb;
+	struct gunyah_gmem_binding *b = NULL;
+	unsigned long start_delta, end_delta;
+	int ret;
+
+	ret = gunyah_gmem_init_binding(ghvm, inode, args, &argb);
+	if (ret)
+		return ret;
+
+	filemap_invalidate_lock(inode->i_mapping);
+	list_for_each_entry(b, &inode->i_mapping->i_private_list, i_entry) {
+		if (b->ghvm != argb.ghvm || b->flags != argb.flags ||
+		    WARN_ON(b->inode != argb.inode))
+			continue;
+		/* Check if argb guest addresses is within b */
+		if (b->gfn > argb.gfn)
+			continue;
+		if (b->gfn + b->nr < argb.gfn + argb.nr)
+			continue;
+		start_delta = argb.gfn - b->gfn;
+		if (argb.i_off - b->i_off != start_delta)
+			continue;
+		end_delta = argb.gfn + argb.nr - b->gfn - b->nr;
+		if (!start_delta && !end_delta) {
+			/* wipe the mapping entirely */
+			gunyah_gmem_remove_binding(b);
+			goto out;
+		} else if (start_delta && !end_delta) {
+			/* shrink the end */
+			down_write(&ghvm->mem_lock);
+			mtree_erase(&b->ghvm->mem_layout, b->gfn);
+			b->nr = start_delta;
+			ret = mtree_insert_range(&ghvm->mem_layout, b->gfn,
+						 b->gfn + b->nr - 1, b,
+						 GFP_KERNEL);
+			up_write(&ghvm->mem_lock);
+			goto out;
+		} else if (!start_delta && end_delta) {
+			/* Shrink the beginning */
+			down_write(&ghvm->mem_lock);
+			mtree_erase(&b->ghvm->mem_layout, b->gfn);
+			b->gfn += argb.nr;
+			b->i_off += argb.nr;
+			b->nr -= argb.nr;
+			ret = mtree_insert_range(&ghvm->mem_layout, b->gfn,
+						 b->gfn + b->nr - 1, b,
+						 GFP_KERNEL);
+			up_write(&ghvm->mem_lock);
+			goto out;
+		} else {
+			/* TODO: split the mapping into 2 */
+			ret = -EINVAL;
+			goto out;
+		}
+	}
+	ret = -ENOENT;
+out:
+	filemap_invalidate_unlock(inode->i_mapping);
+	return ret;
+}
+
+static bool gunyah_gmem_binding_allowed_overlap(struct gunyah_gmem_binding *a,
+						struct gunyah_gmem_binding *b)
+{
+	/* Bindings can't overlap within a VM. Only one guest mem can
+	 * provide for a given guest address
+	 */
+	if (a->ghvm == b->ghvm && a->gfn + a->nr <= b->gfn &&
+	    a->gfn >= b->gfn + b->nr)
+		return false;
+
+	/* Gunyah only guarantees we can share a page with one VM and
+	 * doesn't (currently) allow us to share same page with multiple VMs,
+	 * regardless whether host can also access.
+	 */
+	if (a->inode == b->inode) {
+		if (a->ghvm == b->ghvm) {
+			if (gunyah_guest_mem_is_lend(a->ghvm, a->flags) ||
+			    gunyah_guest_mem_is_lend(b->ghvm, b->flags))
+				return false;
+		} else {
+			if (a->i_off + a->nr < b->i_off)
+				return false;
+			if (a->i_off > b->i_off + b->nr)
+				return false;
+		}
+	}
+
+	return true;
+}
+
+static int gunyah_gmem_add_mapping(struct gunyah_vm *ghvm, struct inode *inode,
+				   struct gunyah_map_mem_args *args)
+{
+	struct gunyah_gmem_binding *b, *tmp = NULL;
+	int ret;
+
+	b = kzalloc(sizeof(*b), GFP_KERNEL);
+	if (!b)
+		return -ENOMEM;
+
+	ret = gunyah_gmem_init_binding(ghvm, inode, args, b);
+	if (ret)
+		return ret;
+
+	filemap_invalidate_lock(inode->i_mapping);
+	list_for_each_entry(tmp, &inode->i_mapping->i_private_list, i_entry) {
+		if (!gunyah_gmem_binding_allowed_overlap(b, tmp)) {
+			ret = -EEXIST;
+			goto unlock;
+		}
+	}
+
+	ret = mtree_insert_range(&ghvm->mem_layout, b->gfn, b->gfn + b->nr - 1,
+				 b, GFP_KERNEL);
+	if (ret)
+		goto unlock;
+
+	list_add(&b->i_entry, &inode->i_mapping->i_private_list);
+
+unlock:
+	filemap_invalidate_unlock(inode->i_mapping);
+	return ret;
+}
+
+int gunyah_gmem_modify_binding(struct gunyah_vm *ghvm,
+			       struct gunyah_map_mem_args *args)
+{
+	u8 access = args->flags & GUNYAH_MEM_ACCESS_MASK;
+	struct file *file;
+	int ret = -EINVAL;
+
+	file = fget(args->guest_mem_fd);
+	if (!file)
+		return -EINVAL;
+
+	if (file->f_op != &gunyah_gmem_fops)
+		goto err_file;
+
+	if (args->flags & ~(GUNYAH_MEM_ALLOW_RWX | GUNYAH_MEM_UNMAP | GUNYAH_MEM_ACCESS_MASK))
+		goto err_file;
+
+	/* VM needs to have some permissions to the memory */
+	if (!(args->flags & GUNYAH_MEM_ALLOW_RWX))
+		goto err_file;
+
+	if (access != GUNYAH_MEM_DEFAULT_ACCESS &&
+	    access != GUNYAH_MEM_FORCE_LEND && access != GUNYAH_MEM_FORCE_SHARE)
+		goto err_file;
+
+	if (!PAGE_ALIGNED(args->guest_addr) || !PAGE_ALIGNED(args->offset) ||
+	    !PAGE_ALIGNED(args->size))
+		goto err_file;
+
+	if (args->flags & GUNYAH_MEM_UNMAP) {
+		args->flags &= ~GUNYAH_MEM_UNMAP;
+		ret = gunyah_gmem_remove_mapping(ghvm, file_inode(file), args);
+	} else {
+		ret = gunyah_gmem_add_mapping(ghvm, file_inode(file), args);
+	}
+
+err_file:
+	fput(file);
+	return ret;
+}
diff --git a/drivers/virt/gunyah/vm_mgr.c b/drivers/virt/gunyah/vm_mgr.c
index cd978d1ce93f..5666070453aa 100644
--- a/drivers/virt/gunyah/vm_mgr.c
+++ b/drivers/virt/gunyah/vm_mgr.c
@@ -519,6 +519,8 @@  static __must_check struct gunyah_vm *gunyah_vm_alloc(struct gunyah_rm *rm)
 	mutex_init(&ghvm->fn_lock);
 
 	mt_init(&ghvm->gm);
+	mt_init(&ghvm->mem_layout);
+	init_rwsem(&ghvm->mem_lock);
 
 	ghvm->addrspace_ticket.resource_type = GUNYAH_RESOURCE_TYPE_ADDR_SPACE;
 	ghvm->addrspace_ticket.label = GUNYAH_VM_ADDRSPACE_LABEL;
@@ -673,6 +675,14 @@  static long gunyah_vm_ioctl(struct file *filp, unsigned int cmd,
 		r = gunyah_vm_rm_function_instance(ghvm, &f);
 		break;
 	}
+	case GUNYAH_VM_MAP_MEM: {
+		struct gunyah_map_mem_args args;
+
+		if (copy_from_user(&args, argp, sizeof(args)))
+			return -EFAULT;
+
+		return gunyah_gmem_modify_binding(ghvm, &args);
+	}
 	default:
 		r = -ENOTTY;
 		break;
@@ -690,6 +700,8 @@  EXPORT_SYMBOL_GPL(gunyah_vm_get);
 static void _gunyah_vm_put(struct kref *kref)
 {
 	struct gunyah_vm *ghvm = container_of(kref, struct gunyah_vm, kref);
+	struct gunyah_gmem_binding *b;
+	unsigned long idx = 0;
 	int ret;
 
 	if (ghvm->vm_status == GUNYAH_RM_VM_STATUS_RUNNING)
@@ -697,6 +709,9 @@  static void _gunyah_vm_put(struct kref *kref)
 
 	gunyah_vm_remove_functions(ghvm);
 
+	mt_for_each(&ghvm->mem_layout, b, idx, ULONG_MAX)
+		gunyah_gmem_remove_binding(b);
+	mtree_destroy(&ghvm->mem_layout);
 	gunyah_vm_reclaim_memory(ghvm);
 
 	gunyah_vm_remove_resource_ticket(ghvm, &ghvm->addrspace_ticket);
diff --git a/drivers/virt/gunyah/vm_mgr.h b/drivers/virt/gunyah/vm_mgr.h
index d26693d10d22..8f1c3ade08dd 100644
--- a/drivers/virt/gunyah/vm_mgr.h
+++ b/drivers/virt/gunyah/vm_mgr.h
@@ -36,6 +36,8 @@  long gunyah_dev_vm_mgr_ioctl(struct gunyah_rm *rm, unsigned int cmd,
 struct gunyah_vm {
 	u16 vmid;
 	struct maple_tree gm;
+	struct maple_tree mem_layout;
+	struct rw_semaphore mem_lock;
 	struct gunyah_vm_resource_ticket addrspace_ticket,
 		host_private_extent_ticket, host_shared_extent_ticket,
 		guest_private_extent_ticket, guest_shared_extent_ticket;
@@ -78,5 +80,9 @@  void gunyah_vm_reclaim_memory(struct gunyah_vm *ghvm);
 int gunyah_vm_mmio_write(struct gunyah_vm *ghvm, u64 addr, u32 len, u64 data);
 
 int gunyah_guest_mem_create(struct gunyah_create_mem_args *args);
+int gunyah_gmem_modify_binding(struct gunyah_vm *ghvm,
+			       struct gunyah_map_mem_args *args);
+struct gunyah_gmem_binding;
+void gunyah_gmem_remove_binding(struct gunyah_gmem_binding *binding);
 
 #endif
diff --git a/include/uapi/linux/gunyah.h b/include/uapi/linux/gunyah.h
index c5f506350364..1af4c5ae6bc3 100644
--- a/include/uapi/linux/gunyah.h
+++ b/include/uapi/linux/gunyah.h
@@ -87,6 +87,47 @@  struct gunyah_fn_desc {
 #define GUNYAH_VM_ADD_FUNCTION	_IOW(GUNYAH_IOCTL_TYPE, 0x4, struct gunyah_fn_desc)
 #define GUNYAH_VM_REMOVE_FUNCTION	_IOW(GUNYAH_IOCTL_TYPE, 0x7, struct gunyah_fn_desc)
 
+/**
+ * enum gunyah_map_flags- Possible flags on &struct gunyah_map_mem_args
+ * @GUNYAH_MEM_DEFAULT_SHARE: Use default host access for the VM type
+ * @GUNYAH_MEM_FORCE_LEND: Force unmapping the memory once the guest starts to use
+ * @GUNYAH_MEM_FORCE_SHARE: Allow host to continue accessing memory when guest starts to use
+ * @GUNYAH_MEM_ALLOW_READ: Allow guest to read memory
+ * @GUNYAH_MEM_ALLOW_WRITE: Allow guest to write to the memory
+ * @GUNYAH_MEM_ALLOW_EXEC: Allow guest to execute instructions in the memory
+ */
+enum gunyah_map_flags {
+	GUNYAH_MEM_DEFAULT_ACCESS = 0,
+	GUNYAH_MEM_FORCE_LEND = 1,
+	GUNYAH_MEM_FORCE_SHARE = 2,
+#define GUNYAH_MEM_ACCESS_MASK 0x7
+
+	GUNYAH_MEM_ALLOW_READ = 1UL << 4,
+	GUNYAH_MEM_ALLOW_WRITE = 1UL << 5,
+	GUNYAH_MEM_ALLOW_EXEC = 1UL << 6,
+	GUNYAH_MEM_ALLOW_RWX =
+		(GUNYAH_MEM_ALLOW_READ | GUNYAH_MEM_ALLOW_WRITE | GUNYAH_MEM_ALLOW_EXEC),
+
+	GUNYAH_MEM_UNMAP = 1UL << 8,
+};
+
+/**
+ * struct gunyah_map_mem_args - Description to provide guest memory into a VM
+ * @guest_addr: Location in guest address space to place the memory
+ * @flags: See &enum gunyah_map_flags.
+ * @guest_mem_fd: File descriptor created by GUNYAH_CREATE_GUEST_MEM
+ * @offset: Offset into the guest memory file
+ */
+struct gunyah_map_mem_args {
+	__u64 guest_addr;
+	__u32 flags;
+	__u32 guest_mem_fd;
+	__u64 offset;
+	__u64 size;
+};
+
+#define GUNYAH_VM_MAP_MEM _IOW(GUNYAH_IOCTL_TYPE, 0x9, struct gunyah_map_mem_args)
+
 /*
  * ioctls for vCPU fds
  */