diff mbox series

[v9,13/27] gunyah: vm_mgr: Add/remove user memory regions

Message ID 20230120224627.4053418-14-quic_eberman@quicinc.com (mailing list archive)
State New, archived
Headers show
Series Drivers for gunyah hypervisor | expand

Commit Message

Elliot Berman Jan. 20, 2023, 10:46 p.m. UTC
When launching a virtual machine, Gunyah userspace allocates memory for
the guest and informs Gunyah about these memory regions through
SET_USER_MEMORY_REGION ioctl.

Co-developed-by: Prakruthi Deepak Heragu <quic_pheragu@quicinc.com>
Signed-off-by: Prakruthi Deepak Heragu <quic_pheragu@quicinc.com>
Signed-off-by: Elliot Berman <quic_eberman@quicinc.com>
---
 drivers/virt/gunyah/Makefile    |   2 +-
 drivers/virt/gunyah/vm_mgr.c    |  46 +++++++
 drivers/virt/gunyah/vm_mgr.h    |  28 +++-
 drivers/virt/gunyah/vm_mgr_mm.c | 223 ++++++++++++++++++++++++++++++++
 include/uapi/linux/gunyah.h     |  22 ++++
 5 files changed, 319 insertions(+), 2 deletions(-)
 create mode 100644 drivers/virt/gunyah/vm_mgr_mm.c

Comments

Srivatsa Vaddagiri Jan. 25, 2023, 1:34 p.m. UTC | #1
* Elliot Berman <quic_eberman@quicinc.com> [2023-01-20 14:46:12]:

> +	/* Check for overlap */
> +	list_for_each_entry(tmp_mapping, &ghvm->memory_mappings, list) {
> +		if (!((mapping->guest_phys_addr + (mapping->npages << PAGE_SHIFT) <=
> +			tmp_mapping->guest_phys_addr) ||
> +			(mapping->guest_phys_addr >=
> +			tmp_mapping->guest_phys_addr + (tmp_mapping->npages << PAGE_SHIFT)))) {
> +			ret = -EEXIST;
> +			goto unlock;
> +		}
> +	}
> +
> +	list_add(&mapping->list, &ghvm->memory_mappings);

I think the potential race condition described last time is still possible. Pls
check.

> +unlock:
> +	mutex_unlock(&ghvm->mm_lock);
> +	if (ret)
> +		goto free_mapping;
> +
> +	mapping->pages = kcalloc(mapping->npages, sizeof(*mapping->pages), GFP_KERNEL);
> +	if (!mapping->pages) {
> +		ret = -ENOMEM;
> +		goto reclaim;

Same comment as last time. Can you check this error path? We seem to call
unpin_user_page() in this path, which is not correct.

> +	}
> +
> +	pinned = pin_user_pages_fast(region->userspace_addr, mapping->npages,
> +					FOLL_WRITE | FOLL_LONGTERM, mapping->pages);
> +	if (pinned < 0) {
> +		ret = pinned;
> +		goto reclaim;

Same comment as above.
Elliot Berman Jan. 30, 2023, 9:46 p.m. UTC | #2
On 1/25/2023 5:34 AM, Srivatsa Vaddagiri wrote:
> * Elliot Berman <quic_eberman@quicinc.com> [2023-01-20 14:46:12]:
> 
>> +	/* Check for overlap */
>> +	list_for_each_entry(tmp_mapping, &ghvm->memory_mappings, list) {
>> +		if (!((mapping->guest_phys_addr + (mapping->npages << PAGE_SHIFT) <=
>> +			tmp_mapping->guest_phys_addr) ||
>> +			(mapping->guest_phys_addr >=
>> +			tmp_mapping->guest_phys_addr + (tmp_mapping->npages << PAGE_SHIFT)))) {
>> +			ret = -EEXIST;
>> +			goto unlock;
>> +		}
>> +	}
>> +
>> +	list_add(&mapping->list, &ghvm->memory_mappings);
> 
> I think the potential race condition described last time is still possible. Pls
> check.
> 
>> +unlock:
>> +	mutex_unlock(&ghvm->mm_lock);
>> +	if (ret)
>> +		goto free_mapping;
>> +
>> +	mapping->pages = kcalloc(mapping->npages, sizeof(*mapping->pages), GFP_KERNEL);
>> +	if (!mapping->pages) {
>> +		ret = -ENOMEM;
>> +		goto reclaim;
> 
> Same comment as last time. Can you check this error path? We seem to call
> unpin_user_page() in this path, which is not correct.
> 
>> +	}
>> +
>> +	pinned = pin_user_pages_fast(region->userspace_addr, mapping->npages,
>> +					FOLL_WRITE | FOLL_LONGTERM, mapping->pages);
>> +	if (pinned < 0) {
>> +		ret = pinned;
>> +		goto reclaim;
> 
> Same comment as above.
> 

Caught the race condition you are describing between adding and removing 
the memory parcels. Should be fixed for v10.

- Elliot
Srinivas Kandagatla Feb. 6, 2023, 4:12 p.m. UTC | #3
On 20/01/2023 22:46, Elliot Berman wrote:
> When launching a virtual machine, Gunyah userspace allocates memory for
> the guest and informs Gunyah about these memory regions through
> SET_USER_MEMORY_REGION ioctl.
> 
> Co-developed-by: Prakruthi Deepak Heragu <quic_pheragu@quicinc.com>
> Signed-off-by: Prakruthi Deepak Heragu <quic_pheragu@quicinc.com>
> Signed-off-by: Elliot Berman <quic_eberman@quicinc.com>
> ---
>   drivers/virt/gunyah/Makefile    |   2 +-
>   drivers/virt/gunyah/vm_mgr.c    |  46 +++++++
>   drivers/virt/gunyah/vm_mgr.h    |  28 +++-
>   drivers/virt/gunyah/vm_mgr_mm.c | 223 ++++++++++++++++++++++++++++++++
>   include/uapi/linux/gunyah.h     |  22 ++++
>   5 files changed, 319 insertions(+), 2 deletions(-)
>   create mode 100644 drivers/virt/gunyah/vm_mgr_mm.c
> 
> diff --git a/drivers/virt/gunyah/Makefile b/drivers/virt/gunyah/Makefile
> index 03951cf82023..ff8bc4925392 100644
> --- a/drivers/virt/gunyah/Makefile
> +++ b/drivers/virt/gunyah/Makefile
> @@ -2,5 +2,5 @@
>   
>   obj-$(CONFIG_GUNYAH) += gunyah.o
>   
> -gunyah_rsc_mgr-y += rsc_mgr.o rsc_mgr_rpc.o vm_mgr.o
> +gunyah_rsc_mgr-y += rsc_mgr.o rsc_mgr_rpc.o vm_mgr.o vm_mgr_mm.o
>   obj-$(CONFIG_GUNYAH) += gunyah_rsc_mgr.o
> diff --git a/drivers/virt/gunyah/vm_mgr.c b/drivers/virt/gunyah/vm_mgr.c
> index 0864dbd77e28..b847fde63333 100644
> --- a/drivers/virt/gunyah/vm_mgr.c
> +++ b/drivers/virt/gunyah/vm_mgr.c
> @@ -35,14 +35,55 @@ static __must_check struct gunyah_vm *gunyah_vm_alloc(struct gh_rm_rpc *rm)
>   	ghvm->vmid = vmid;
>   	ghvm->rm = rm;
>   
> +	mutex_init(&ghvm->mm_lock);
> +	INIT_LIST_HEAD(&ghvm->memory_mappings);
> +
>   	return ghvm;
>   }
>   
>   static long gh_vm_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
>   {
> +	struct gunyah_vm *ghvm = filp->private_data;
> +	void __user *argp = (void __user *)arg;
>   	long r;
>   
>   	switch (cmd) {
> +	case GH_VM_SET_USER_MEM_REGION: {
> +		struct gunyah_vm_memory_mapping *mapping;
> +		struct gh_userspace_memory_region region;
> +
> +		r = -EFAULT;
> +		if (copy_from_user(&region, argp, sizeof(region)))
> +			break;
Why not be explict about the error codes, do something like.

if (copy_from_user(&region, argp, sizeof(region)))
	return -EFAULT;


setting r value everytime before starting any code is making the code 
more reader unfriendly.


> +
> +		r = -EINVAL;
> +		/* All other flag bits are reserved for future use */
> +		if (region.flags & ~(GH_MEM_ALLOW_READ | GH_MEM_ALLOW_WRITE | GH_MEM_ALLOW_EXEC |
> +			GH_MEM_LENT))
> +			break;
> +
> +
> +		if (region.memory_size) {

This behaviour allocating memory in presense of valid memory_size and 
finding memory in cases of zero size needs to be described properly in 
the uapi so that the users are aware of this.

> +			r = 0;
> +			mapping = gh_vm_mem_mapping_alloc(ghvm, &region);
> +			if (IS_ERR(mapping)) {
> +				r = PTR_ERR(mapping);
> +				break;
> +			}
> +		} else {
> +			mapping = gh_vm_mem_mapping_find(ghvm, region.label);
> +			if (IS_ERR(mapping)) {
> +				r = PTR_ERR(mapping);
> +				break;
> +			}
> +			r = 0;
> +			if (!mapping)
> +				break;
> +			gh_vm_mem_mapping_reclaim(ghvm, mapping);
> +			kfree(mapping);
> +		}
> +		break;
> +	}
>   	default:
>   		r = -ENOTTY;
>   		break;
> @@ -54,7 +95,12 @@ static long gh_vm_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
>   static int gh_vm_release(struct inode *inode, struct file *filp)
>   {
>   	struct gunyah_vm *ghvm = filp->private_data;
> +	struct gunyah_vm_memory_mapping *mapping, *tmp;
>   
Locking?
> +	list_for_each_entry_safe(mapping, tmp, &ghvm->memory_mappings, list) {
> +		gh_vm_mem_mapping_reclaim(ghvm, mapping);
> +		kfree(mapping);
> +	}
>   	put_gh_rm(ghvm->rm);
>   	kfree(ghvm);
>   	return 0;
> diff --git a/drivers/virt/gunyah/vm_mgr.h b/drivers/virt/gunyah/vm_mgr.h
> index e47f34de7f9e..6b38bf780f76 100644
> --- a/drivers/virt/gunyah/vm_mgr.h
> +++ b/drivers/virt/gunyah/vm_mgr.h
> @@ -7,14 +7,40 @@
>   #define _GH_PRIV_VM_MGR_H
>   
>   #include <linux/gunyah_rsc_mgr.h>
> +#include <linux/list.h>
> +#include <linux/miscdevice.h>
> +#include <linux/mutex.h>
>   
>   #include <uapi/linux/gunyah.h>
>   
>   long gh_dev_vm_mgr_ioctl(struct gh_rm *rm, unsigned int cmd, unsigned long arg);
>   
> +enum gunyah_vm_mem_share_type {
> +	VM_MEM_SHARE,
> +	VM_MEM_LEND,
> +};
> +
> +struct gunyah_vm_memory_mapping {
> +	struct list_head list;
> +	enum gunyah_vm_mem_share_type share_type;
> +	struct gh_rm_mem_parcel parcel;
> +
> +	__u64 guest_phys_addr;
> +	struct page **pages;
> +	unsigned long npages;
> +};
> +
>   struct gunyah_vm {
>   	u16 vmid;
> -	struct gh_rm_rpc *rm;
> +	struct gh_rm *rm;
> +
> +	struct mutex mm_lock;
> +	struct list_head memory_mappings;
>   };
>   
> +struct gunyah_vm_memory_mapping *gh_vm_mem_mapping_alloc(struct gunyah_vm *ghvm,
> +							struct gh_userspace_memory_region *region);
> +void gh_vm_mem_mapping_reclaim(struct gunyah_vm *ghvm, struct gunyah_vm_memory_mapping *mapping);
> +struct gunyah_vm_memory_mapping *gh_vm_mem_mapping_find(struct gunyah_vm *ghvm, u32 label);
> +
>   #endif
> diff --git a/drivers/virt/gunyah/vm_mgr_mm.c b/drivers/virt/gunyah/vm_mgr_mm.c
> new file mode 100644
> index 000000000000..f2dbdb4ee8ab
> --- /dev/null
> +++ b/drivers/virt/gunyah/vm_mgr_mm.c
> @@ -0,0 +1,223 @@
> +// SPDX-License-Identifier: GPL-2.0-only
> +/*
> + * Copyright (c) 2022-2023 Qualcomm Innovation Center, Inc. All rights reserved.
> + */
> +
> +#define pr_fmt(fmt) "gh_vm_mgr: " fmt
> +
> +#include <linux/gunyah_rsc_mgr.h>
> +#include <linux/mm.h>
> +
> +#include <uapi/linux/gunyah.h>
> +
> +#include "vm_mgr.h"
> +
> +static inline bool page_contiguous(phys_addr_t p, phys_addr_t t)
> +{
> +	return t - p == PAGE_SIZE;
> +}
> +
> +static struct gunyah_vm_memory_mapping *__gh_vm_mem_mapping_find(struct gunyah_vm *ghvm, u32 label)
> +{
> +	struct gunyah_vm_memory_mapping *mapping;
> +
> +
only one line.
> +	list_for_each_entry(mapping, &ghvm->memory_mappings, list)
> +		if (mapping->parcel.label == label)
> +			return mapping;
> +
> +	return NULL;
> +}
> +
> +void gh_vm_mem_mapping_reclaim(struct gunyah_vm *ghvm, struct gunyah_vm_memory_mapping *mapping)
> +{
> +	int i, ret = 0;
> +
> +	if (mapping->parcel.mem_handle != GH_MEM_HANDLE_INVAL) {
> +		ret = gh_rm_mem_reclaim(ghvm->rm, &mapping->parcel);
> +		if (ret)
> +			pr_warn("Failed to reclaim memory parcel for label %d: %d\n",
> +				mapping->parcel.label, ret);
> +	}
> +
> +	if (!ret)
> +		for (i = 0; i < mapping->npages; i++)
> +			unpin_user_page(mapping->pages[i]);
> +
> +	kfree(mapping->pages);
> +	kfree(mapping->parcel.acl_entries);
> +	kfree(mapping->parcel.mem_entries);
> +
> +	mutex_lock(&ghvm->mm_lock);
> +	list_del(&mapping->list);
> +	mutex_unlock(&ghvm->mm_lock);
> +}
> +
> +struct gunyah_vm_memory_mapping *gh_vm_mem_mapping_find(struct gunyah_vm *ghvm, u32 label)
> +{
> +	struct gunyah_vm_memory_mapping *mapping;
> +	int ret;
> +
> +	ret = mutex_lock_interruptible(&ghvm->mm_lock);
> +	if (ret)
> +		return ERR_PTR(ret);
> +	mapping = __gh_vm_mem_mapping_find(ghvm, label);
> +	mutex_unlock(&ghvm->mm_lock);
> +	return mapping ? : ERR_PTR(-ENODEV);
> +}
> +
> +struct gunyah_vm_memory_mapping *gh_vm_mem_mapping_alloc(struct gunyah_vm *ghvm,
> +							struct gh_userspace_memory_region *region)
> +{
Is this a static functoin or an exported symbol?

> +	phys_addr_t curr_page, prev_page;
> +	struct gunyah_vm_memory_mapping *mapping, *tmp_mapping;
> +	struct gh_rm_mem_entry *mem_entries;
> +	int i, j, pinned, ret = 0;
> +	struct gh_rm_mem_parcel *parcel;
> +	size_t entry_size;
> +	u16 vmid; > +
Reverse christmas tree to sor local variables would be nice.

> +	if (!region->memory_size || !PAGE_ALIGNED(region->memory_size) ||
> +		!PAGE_ALIGNED(region->userspace_addr))
Even this alignment needs some documentation.

Or why not just let the user only pass number of pages instead of size?


> +		return ERR_PTR(-EINVAL);
> +
> +	if (!gh_api_has_feature(GH_API_FEATURE_MEMEXTENT))
> +		return ERR_PTR(-EOPNOTSUPP);

We should proabably move this as very first check while handling this IOCTL.


> +
> +	ret = mutex_lock_interruptible(&ghvm->mm_lock);
> +	if (ret)
> +		return ERR_PTR(ret);
> +	mapping = __gh_vm_mem_mapping_find(ghvm, region->label);

so label is unique and userspace proabably aware of this?
Can we have more than one userspace doing this? and if so how can it 
ensure that each label is unique?


> +	if (mapping) {
> +		mutex_unlock(&ghvm->mm_lock);
> +		return ERR_PTR(-EEXIST);
> +	}
> +
> +	mapping = kzalloc(sizeof(*mapping), GFP_KERNEL);
> +	if (!mapping) {
> +		ret = -ENOMEM;
> +		goto unlock;
> +	}
> +
> +	mapping->parcel.label = region->label;

> +	mapping->guest_phys_addr = region->guest_phys_addr;
> +	mapping->npages = region->memory_size >> PAGE_SHIFT;
> +	parcel = &mapping->parcel;
> +	parcel->mem_handle = GH_MEM_HANDLE_INVAL; /* to be filled later by mem_share/mem_lend */
> +	parcel->mem_type = GH_RM_MEM_TYPE_NORMAL;
> +
> +	/* Check for overlap */
> +	list_for_each_entry(tmp_mapping, &ghvm->memory_mappings, list) {
> +		if (!((mapping->guest_phys_addr + (mapping->npages << PAGE_SHIFT) <=
> +			tmp_mapping->guest_phys_addr) ||
> +			(mapping->guest_phys_addr >=
> +			tmp_mapping->guest_phys_addr + (tmp_mapping->npages << PAGE_SHIFT)))) {
> +			ret = -EEXIST;
> +			goto unlock;
> +		}
> +	}
This looks like we will loop every mappign for each allocation giving us 
an O(n), How frequent and how many max mappings can be there in the system?

> +
> +	list_add(&mapping->list, &ghvm->memory_mappings);
> +unlock:
> +	mutex_unlock(&ghvm->mm_lock);
> +	if (ret)
> +		goto free_mapping;
> +
> +	mapping->pages = kcalloc(mapping->npages, sizeof(*mapping->pages), GFP_KERNEL);
> +	if (!mapping->pages) {
> +		ret = -ENOMEM;
> +		goto reclaim;
> +	}
> +
> +	pinned = pin_user_pages_fast(region->userspace_addr, mapping->npages,
> +					FOLL_WRITE | FOLL_LONGTERM, mapping->pages);
> +	if (pinned < 0) {
> +		ret = pinned;
> +		goto reclaim;
> +	} else if (pinned != mapping->npages) {
> +		ret = -EFAULT;
> +		mapping->npages = pinned; /* update npages for reclaim */
> +		goto reclaim;
> +	}
> +
> +	if (region->flags & GH_MEM_LENT) {
> +		parcel->n_acl_entries = 1;
> +		mapping->share_type = VM_MEM_LEND;
> +	} else {
> +		parcel->n_acl_entries = 2;
> +		mapping->share_type = VM_MEM_SHARE;
> +	}
> +	parcel->acl_entries = kcalloc(parcel->n_acl_entries,
> +						sizeof(*parcel->acl_entries),
> +						GFP_KERNEL);
> +	if (!parcel->acl_entries) {
> +		ret = -ENOMEM;
> +		goto reclaim;
> +	}
> +
> +	parcel->acl_entries[0].vmid = cpu_to_le16(ghvm->vmid);
> +	if (region->flags & GH_MEM_ALLOW_READ)
> +		parcel->acl_entries[0].perms |= GH_RM_ACL_R;
> +	if (region->flags & GH_MEM_ALLOW_WRITE)
> +		parcel->acl_entries[0].perms |= GH_RM_ACL_W;
> +	if (region->flags & GH_MEM_ALLOW_EXEC)
> +		parcel->acl_entries[0].perms |= GH_RM_ACL_X;
> +
> +	if (mapping->share_type == VM_MEM_SHARE) {
> +		ret = gh_rm_get_vmid(ghvm->rm, &vmid);
> +		if (ret) {
> +			if (ret > 0) {
> +				pr_warn("RM failed to get this VM's VMID: %d", ret);
> +				ret = -EINVAL;
> +			}
> +			goto reclaim;
> +		}
> +
> +		parcel->acl_entries[1].vmid = cpu_to_le16(vmid);
> +		/* Host assumed to have all these permissions. Gunyah will not
> +		 * grant new permissions if host actually had less than RWX
> +		 */
> +		parcel->acl_entries[1].perms |= GH_RM_ACL_R | GH_RM_ACL_W | GH_RM_ACL_X;
> +	}
> +
> +	mem_entries = kcalloc(mapping->npages, sizeof(*mem_entries), GFP_KERNEL);
> +	if (!mem_entries) {
> +		ret = -ENOMEM;
> +		goto reclaim;
> +	}
> +
> +	/* reduce number of entries by combining contiguous pages into single memory entry */
> +	prev_page = page_to_phys(mapping->pages[0]);
> +	mem_entries[0].ipa_base = cpu_to_le64(prev_page);
> +	entry_size = PAGE_SIZE;
> +	for (i = 1, j = 0; i < mapping->npages; i++) {
> +		curr_page = page_to_phys(mapping->pages[i]);
> +		if (page_contiguous(prev_page, curr_page)) {
> +			entry_size += PAGE_SIZE;
> +		} else {
> +			mem_entries[j].size = cpu_to_le64(entry_size);
> +			j++;
> +			mem_entries[j].ipa_base = cpu_to_le64(curr_page);
> +			entry_size = PAGE_SIZE;
> +		}
> +
> +		prev_page = curr_page;
> +	}
> +	mem_entries[j].size = cpu_to_le64(entry_size);
> +
> +	parcel->n_mem_entries = j + 1;
> +	parcel->mem_entries = kmemdup(mem_entries, sizeof(*mem_entries) * parcel->n_mem_entries,
> +					GFP_KERNEL);
> +	kfree(mem_entries);
> +	if (!parcel->mem_entries) {
> +		ret = -ENOMEM;
> +		goto reclaim;
> +	}
> +
> +	return mapping;
> +reclaim:
> +	gh_vm_mem_mapping_reclaim(ghvm, mapping);
> +free_mapping:
> +	kfree(mapping);
> +	return ERR_PTR(ret);
> +}
> diff --git a/include/uapi/linux/gunyah.h b/include/uapi/linux/gunyah.h
> index 88a40d6e0b96..574f33b198d0 100644
> --- a/include/uapi/linux/gunyah.h
> +++ b/include/uapi/linux/gunyah.h
> @@ -20,4 +20,26 @@
>    */
>   #define GH_CREATE_VM			_IO(GH_IOCTL_TYPE, 0x0) /* Returns a Gunyah VM fd */
>   
> +/*
> + * ioctls for VM fds
> + */
> +struct gh_userspace_memory_region {

This struct needs some kernedoc.

> +	__u32 label;

> +#define GH_MEM_ALLOW_READ	(1UL << 0)
> +#define GH_MEM_ALLOW_WRITE	(1UL << 1)
> +#define GH_MEM_ALLOW_EXEC	(1UL << 2)
> +/*
> + * The guest will be lent the memory instead of shared.
> + * In other words, the guest has exclusive access to the memory region and the host loses access.
> + */
> +#define GH_MEM_LENT		(1UL << 3)
> +	__u32 flags;
> +	__u64 guest_phys_addr;
> +	__u64 memory_size;
if we are only expecting pages, this should probably be make explict by 
using nr_pages instead of size

> +	__u64 userspace_addr;
> +};
> +
> +#define GH_VM_SET_USER_MEM_REGION	_IOW(GH_IOCTL_TYPE, 0x1, \
> +						struct gh_userspace_memory_region)
> +
>   #endif
Elliot Berman Feb. 6, 2023, 11:23 p.m. UTC | #4
On 2/6/2023 8:12 AM, Srinivas Kandagatla wrote:
> 
> 
> On 20/01/2023 22:46, Elliot Berman wrote:
>> When launching a virtual machine, Gunyah userspace allocates memory for
>> the guest and informs Gunyah about these memory regions through
>> SET_USER_MEMORY_REGION ioctl.
>>
>> Co-developed-by: Prakruthi Deepak Heragu <quic_pheragu@quicinc.com>
>> Signed-off-by: Prakruthi Deepak Heragu <quic_pheragu@quicinc.com>
>> Signed-off-by: Elliot Berman <quic_eberman@quicinc.com>
>> ---
>>   drivers/virt/gunyah/Makefile    |   2 +-
>>   drivers/virt/gunyah/vm_mgr.c    |  46 +++++++
>>   drivers/virt/gunyah/vm_mgr.h    |  28 +++-
>>   drivers/virt/gunyah/vm_mgr_mm.c | 223 ++++++++++++++++++++++++++++++++
>>   include/uapi/linux/gunyah.h     |  22 ++++
>>   5 files changed, 319 insertions(+), 2 deletions(-)
>>   create mode 100644 drivers/virt/gunyah/vm_mgr_mm.c
>>
>> diff --git a/drivers/virt/gunyah/Makefile b/drivers/virt/gunyah/Makefile
>> index 03951cf82023..ff8bc4925392 100644
>> --- a/drivers/virt/gunyah/Makefile
>> +++ b/drivers/virt/gunyah/Makefile
>> @@ -2,5 +2,5 @@
>>   obj-$(CONFIG_GUNYAH) += gunyah.o
>> -gunyah_rsc_mgr-y += rsc_mgr.o rsc_mgr_rpc.o vm_mgr.o
>> +gunyah_rsc_mgr-y += rsc_mgr.o rsc_mgr_rpc.o vm_mgr.o vm_mgr_mm.o
>>   obj-$(CONFIG_GUNYAH) += gunyah_rsc_mgr.o
>> diff --git a/drivers/virt/gunyah/vm_mgr.c b/drivers/virt/gunyah/vm_mgr.c
>> index 0864dbd77e28..b847fde63333 100644
>> --- a/drivers/virt/gunyah/vm_mgr.c
>> +++ b/drivers/virt/gunyah/vm_mgr.c
>> @@ -35,14 +35,55 @@ static __must_check struct gunyah_vm 
>> *gunyah_vm_alloc(struct gh_rm_rpc *rm)
>>       ghvm->vmid = vmid;
>>       ghvm->rm = rm;
>> +    mutex_init(&ghvm->mm_lock);
>> +    INIT_LIST_HEAD(&ghvm->memory_mappings);
>> +
>>       return ghvm;
>>   }
>>   static long gh_vm_ioctl(struct file *filp, unsigned int cmd, 
>> unsigned long arg)
>>   {
>> +    struct gunyah_vm *ghvm = filp->private_data;
>> +    void __user *argp = (void __user *)arg;
>>       long r;
>>       switch (cmd) {
>> +    case GH_VM_SET_USER_MEM_REGION: {
>> +        struct gunyah_vm_memory_mapping *mapping;
>> +        struct gh_userspace_memory_region region;
>> +
>> +        r = -EFAULT;
>> +        if (copy_from_user(&region, argp, sizeof(region)))
>> +            break;
> Why not be explict about the error codes, do something like.
> 
> if (copy_from_user(&region, argp, sizeof(region)))
>      return -EFAULT;
> 
> 
> setting r value everytime before starting any code is making the code 
> more reader unfriendly.
> 
> 

Done.


>> +
>> +        r = -EINVAL;
>> +        /* All other flag bits are reserved for future use */
>> +        if (region.flags & ~(GH_MEM_ALLOW_READ | GH_MEM_ALLOW_WRITE | 
>> GH_MEM_ALLOW_EXEC |
>> +            GH_MEM_LENT))
>> +            break;
>> +
>> +
>> +        if (region.memory_size) {
> 
> This behaviour allocating memory in presense of valid memory_size and 
> finding memory in cases of zero size needs to be described properly in 
> the uapi so that the users are aware of this.
> 

This behavior is described in "Document Gunyah VM Manager": 
https://lore.kernel.org/all/20230120224627.4053418-20-quic_eberman@quicinc.com/

>> +            r = 0;
>> +            mapping = gh_vm_mem_mapping_alloc(ghvm, &region);
>> +            if (IS_ERR(mapping)) {
>> +                r = PTR_ERR(mapping);
>> +                break;
>> +            }
>> +        } else {
>> +            mapping = gh_vm_mem_mapping_find(ghvm, region.label);
>> +            if (IS_ERR(mapping)) {
>> +                r = PTR_ERR(mapping);
>> +                break;
>> +            }
>> +            r = 0;
>> +            if (!mapping)
>> +                break;
>> +            gh_vm_mem_mapping_reclaim(ghvm, mapping);
>> +            kfree(mapping);
>> +        }
>> +        break;
>> +    }
>>       default:
>>           r = -ENOTTY;
>>           break;
>> @@ -54,7 +95,12 @@ static long gh_vm_ioctl(struct file *filp, unsigned 
>> int cmd, unsigned long arg)
>>   static int gh_vm_release(struct inode *inode, struct file *filp)
>>   {
>>       struct gunyah_vm *ghvm = filp->private_data;
>> +    struct gunyah_vm_memory_mapping *mapping, *tmp;
> Locking?
>> +    list_for_each_entry_safe(mapping, tmp, &ghvm->memory_mappings, 
>> list) {
>> +        gh_vm_mem_mapping_reclaim(ghvm, mapping);
>> +        kfree(mapping);
>> +    }
>>       put_gh_rm(ghvm->rm);
>>       kfree(ghvm);
>>       return 0;
>> diff --git a/drivers/virt/gunyah/vm_mgr.h b/drivers/virt/gunyah/vm_mgr.h
>> index e47f34de7f9e..6b38bf780f76 100644
>> --- a/drivers/virt/gunyah/vm_mgr.h
>> +++ b/drivers/virt/gunyah/vm_mgr.h
>> @@ -7,14 +7,40 @@
>>   #define _GH_PRIV_VM_MGR_H
>>   #include <linux/gunyah_rsc_mgr.h>
>> +#include <linux/list.h>
>> +#include <linux/miscdevice.h>
>> +#include <linux/mutex.h>
>>   #include <uapi/linux/gunyah.h>
>>   long gh_dev_vm_mgr_ioctl(struct gh_rm *rm, unsigned int cmd, 
>> unsigned long arg);
>> +enum gunyah_vm_mem_share_type {
>> +    VM_MEM_SHARE,
>> +    VM_MEM_LEND,
>> +};
>> +
>> +struct gunyah_vm_memory_mapping {
>> +    struct list_head list;
>> +    enum gunyah_vm_mem_share_type share_type;
>> +    struct gh_rm_mem_parcel parcel;
>> +
>> +    __u64 guest_phys_addr;
>> +    struct page **pages;
>> +    unsigned long npages;
>> +};
>> +
>>   struct gunyah_vm {
>>       u16 vmid;
>> -    struct gh_rm_rpc *rm;
>> +    struct gh_rm *rm;
>> +
>> +    struct mutex mm_lock;
>> +    struct list_head memory_mappings;
>>   };
>> +struct gunyah_vm_memory_mapping *gh_vm_mem_mapping_alloc(struct 
>> gunyah_vm *ghvm,
>> +                            struct gh_userspace_memory_region *region);
>> +void gh_vm_mem_mapping_reclaim(struct gunyah_vm *ghvm, struct 
>> gunyah_vm_memory_mapping *mapping);
>> +struct gunyah_vm_memory_mapping *gh_vm_mem_mapping_find(struct 
>> gunyah_vm *ghvm, u32 label);
>> +
>>   #endif
>> diff --git a/drivers/virt/gunyah/vm_mgr_mm.c 
>> b/drivers/virt/gunyah/vm_mgr_mm.c
>> new file mode 100644
>> index 000000000000..f2dbdb4ee8ab
>> --- /dev/null
>> +++ b/drivers/virt/gunyah/vm_mgr_mm.c
>> @@ -0,0 +1,223 @@
>> +// SPDX-License-Identifier: GPL-2.0-only
>> +/*
>> + * Copyright (c) 2022-2023 Qualcomm Innovation Center, Inc. All 
>> rights reserved.
>> + */
>> +
>> +#define pr_fmt(fmt) "gh_vm_mgr: " fmt
>> +
>> +#include <linux/gunyah_rsc_mgr.h>
>> +#include <linux/mm.h>
>> +
>> +#include <uapi/linux/gunyah.h>
>> +
>> +#include "vm_mgr.h"
>> +
>> +static inline bool page_contiguous(phys_addr_t p, phys_addr_t t)
>> +{
>> +    return t - p == PAGE_SIZE;
>> +}
>> +
>> +static struct gunyah_vm_memory_mapping 
>> *__gh_vm_mem_mapping_find(struct gunyah_vm *ghvm, u32 label)
>> +{
>> +    struct gunyah_vm_memory_mapping *mapping;
>> +
>> +
> only one line.

Done.

>> +    list_for_each_entry(mapping, &ghvm->memory_mappings, list)
>> +        if (mapping->parcel.label == label)
>> +            return mapping;
>> +
>> +    return NULL;
>> +}
>> +
>> +void gh_vm_mem_mapping_reclaim(struct gunyah_vm *ghvm, struct 
>> gunyah_vm_memory_mapping *mapping)
>> +{
>> +    int i, ret = 0;
>> +
>> +    if (mapping->parcel.mem_handle != GH_MEM_HANDLE_INVAL) {
>> +        ret = gh_rm_mem_reclaim(ghvm->rm, &mapping->parcel);
>> +        if (ret)
>> +            pr_warn("Failed to reclaim memory parcel for label %d: 
>> %d\n",
>> +                mapping->parcel.label, ret);
>> +    }
>> +
>> +    if (!ret)
>> +        for (i = 0; i < mapping->npages; i++)
>> +            unpin_user_page(mapping->pages[i]);
>> +
>> +    kfree(mapping->pages);
>> +    kfree(mapping->parcel.acl_entries);
>> +    kfree(mapping->parcel.mem_entries);
>> +
>> +    mutex_lock(&ghvm->mm_lock);
>> +    list_del(&mapping->list);
>> +    mutex_unlock(&ghvm->mm_lock);
>> +}
>> +
>> +struct gunyah_vm_memory_mapping *gh_vm_mem_mapping_find(struct 
>> gunyah_vm *ghvm, u32 label)
>> +{
>> +    struct gunyah_vm_memory_mapping *mapping;
>> +    int ret;
>> +
>> +    ret = mutex_lock_interruptible(&ghvm->mm_lock);
>> +    if (ret)
>> +        return ERR_PTR(ret);
>> +    mapping = __gh_vm_mem_mapping_find(ghvm, label);
>> +    mutex_unlock(&ghvm->mm_lock);
>> +    return mapping ? : ERR_PTR(-ENODEV);
>> +}
>> +
>> +struct gunyah_vm_memory_mapping *gh_vm_mem_mapping_alloc(struct 
>> gunyah_vm *ghvm,
>> +                            struct gh_userspace_memory_region *region)
>> +{
> Is this a static functoin or an exported symbol?
> 

Neither, it's used in vm_mgr.c.

>> +    phys_addr_t curr_page, prev_page;
>> +    struct gunyah_vm_memory_mapping *mapping, *tmp_mapping;
>> +    struct gh_rm_mem_entry *mem_entries;
>> +    int i, j, pinned, ret = 0;
>> +    struct gh_rm_mem_parcel *parcel;
>> +    size_t entry_size;
>> +    u16 vmid; > +
> Reverse christmas tree to sor local variables would be nice.
> 

Done, and checked throughout the series as well.

>> +    if (!region->memory_size || !PAGE_ALIGNED(region->memory_size) ||
>> +        !PAGE_ALIGNED(region->userspace_addr))
> Even this alignment needs some documentation.
> 

Documented now.

> Or why not just let the user only pass number of pages instead of size?
> 
> 

KVM, Nitro enclaves, and ACRN all give size directly and not as # of pages.

>> +        return ERR_PTR(-EINVAL);
>> +
>> +    if (!gh_api_has_feature(GH_API_FEATURE_MEMEXTENT))
>> +        return ERR_PTR(-EOPNOTSUPP);
> 
> We should proabably move this as very first check while handling this 
> IOCTL.
> 

Done.

> 
>> +
>> +    ret = mutex_lock_interruptible(&ghvm->mm_lock);
>> +    if (ret)
>> +        return ERR_PTR(ret);
>> +    mapping = __gh_vm_mem_mapping_find(ghvm, region->label);
> 
> so label is unique and userspace proabably aware of this?
> Can we have more than one userspace doing this? and if so how can it 
> ensure that each label is unique?
> 

The label is unique and userspace is aware of this. One userspace owns 
the VM FD.

> 
>> +    if (mapping) {
>> +        mutex_unlock(&ghvm->mm_lock);
>> +        return ERR_PTR(-EEXIST);
>> +    }
>> +
>> +    mapping = kzalloc(sizeof(*mapping), GFP_KERNEL);
>> +    if (!mapping) {
>> +        ret = -ENOMEM;
>> +        goto unlock;
>> +    }
>> +
>> +    mapping->parcel.label = region->label;
> 
>> +    mapping->guest_phys_addr = region->guest_phys_addr;
>> +    mapping->npages = region->memory_size >> PAGE_SHIFT;
>> +    parcel = &mapping->parcel;
>> +    parcel->mem_handle = GH_MEM_HANDLE_INVAL; /* to be filled later 
>> by mem_share/mem_lend */
>> +    parcel->mem_type = GH_RM_MEM_TYPE_NORMAL;
>> +
>> +    /* Check for overlap */
>> +    list_for_each_entry(tmp_mapping, &ghvm->memory_mappings, list) {
>> +        if (!((mapping->guest_phys_addr + (mapping->npages << 
>> PAGE_SHIFT) <=
>> +            tmp_mapping->guest_phys_addr) ||
>> +            (mapping->guest_phys_addr >=
>> +            tmp_mapping->guest_phys_addr + (tmp_mapping->npages << 
>> PAGE_SHIFT)))) {
>> +            ret = -EEXIST;
>> +            goto unlock;
>> +        }
>> +    }
> This looks like we will loop every mappign for each allocation giving us 
> an O(n), How frequent and how many max mappings can be there in the system?
> 

In all our use cases so far, only a few (max 3) mappings are used. 
Gunyah and Linux would be similarly bounded by heap-based memory 
constraints when adding more memory mappings if you don't hit U32_MAX first.

This could be reworked into a rb tree, but I thought it better to use a 
simpler list-based implementation since it was easier to review and 
benefits are not seen when only a few mappings used.

[snip]

>> --- a/include/uapi/linux/gunyah.h
>> +++ b/include/uapi/linux/gunyah.h
>> @@ -20,4 +20,26 @@
>>    */
>>   #define GH_CREATE_VM            _IO(GH_IOCTL_TYPE, 0x0) /* Returns a 
>> Gunyah VM fd */
>> +/*
>> + * ioctls for VM fds
>> + */
>> +struct gh_userspace_memory_region {
> 
> This struct needs some kernedoc.
> 

Done, as well for the other UAPI structs added later in the series.


>> +    __u32 label;
> 
>> +#define GH_MEM_ALLOW_READ    (1UL << 0)
>> +#define GH_MEM_ALLOW_WRITE    (1UL << 1)
>> +#define GH_MEM_ALLOW_EXEC    (1UL << 2)
>> +/*
>> + * The guest will be lent the memory instead of shared.
>> + * In other words, the guest has exclusive access to the memory 
>> region and the host loses access.
>> + */
>> +#define GH_MEM_LENT        (1UL << 3)
>> +    __u32 flags;
>> +    __u64 guest_phys_addr;
>> +    __u64 memory_size;
> if we are only expecting pages, this should probably be make explict by 
> using nr_pages instead of size
> 
>> +    __u64 userspace_addr;
>> +};
>> +
>> +#define GH_VM_SET_USER_MEM_REGION    _IOW(GH_IOCTL_TYPE, 0x1, \
>> +                        struct gh_userspace_memory_region)
>> +
>>   #endif
diff mbox series

Patch

diff --git a/drivers/virt/gunyah/Makefile b/drivers/virt/gunyah/Makefile
index 03951cf82023..ff8bc4925392 100644
--- a/drivers/virt/gunyah/Makefile
+++ b/drivers/virt/gunyah/Makefile
@@ -2,5 +2,5 @@ 
 
 obj-$(CONFIG_GUNYAH) += gunyah.o
 
-gunyah_rsc_mgr-y += rsc_mgr.o rsc_mgr_rpc.o vm_mgr.o
+gunyah_rsc_mgr-y += rsc_mgr.o rsc_mgr_rpc.o vm_mgr.o vm_mgr_mm.o
 obj-$(CONFIG_GUNYAH) += gunyah_rsc_mgr.o
diff --git a/drivers/virt/gunyah/vm_mgr.c b/drivers/virt/gunyah/vm_mgr.c
index 0864dbd77e28..b847fde63333 100644
--- a/drivers/virt/gunyah/vm_mgr.c
+++ b/drivers/virt/gunyah/vm_mgr.c
@@ -35,14 +35,55 @@  static __must_check struct gunyah_vm *gunyah_vm_alloc(struct gh_rm_rpc *rm)
 	ghvm->vmid = vmid;
 	ghvm->rm = rm;
 
+	mutex_init(&ghvm->mm_lock);
+	INIT_LIST_HEAD(&ghvm->memory_mappings);
+
 	return ghvm;
 }
 
 static long gh_vm_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
 {
+	struct gunyah_vm *ghvm = filp->private_data;
+	void __user *argp = (void __user *)arg;
 	long r;
 
 	switch (cmd) {
+	case GH_VM_SET_USER_MEM_REGION: {
+		struct gunyah_vm_memory_mapping *mapping;
+		struct gh_userspace_memory_region region;
+
+		r = -EFAULT;
+		if (copy_from_user(&region, argp, sizeof(region)))
+			break;
+
+		r = -EINVAL;
+		/* All other flag bits are reserved for future use */
+		if (region.flags & ~(GH_MEM_ALLOW_READ | GH_MEM_ALLOW_WRITE | GH_MEM_ALLOW_EXEC |
+			GH_MEM_LENT))
+			break;
+
+
+		if (region.memory_size) {
+			r = 0;
+			mapping = gh_vm_mem_mapping_alloc(ghvm, &region);
+			if (IS_ERR(mapping)) {
+				r = PTR_ERR(mapping);
+				break;
+			}
+		} else {
+			mapping = gh_vm_mem_mapping_find(ghvm, region.label);
+			if (IS_ERR(mapping)) {
+				r = PTR_ERR(mapping);
+				break;
+			}
+			r = 0;
+			if (!mapping)
+				break;
+			gh_vm_mem_mapping_reclaim(ghvm, mapping);
+			kfree(mapping);
+		}
+		break;
+	}
 	default:
 		r = -ENOTTY;
 		break;
@@ -54,7 +95,12 @@  static long gh_vm_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
 static int gh_vm_release(struct inode *inode, struct file *filp)
 {
 	struct gunyah_vm *ghvm = filp->private_data;
+	struct gunyah_vm_memory_mapping *mapping, *tmp;
 
+	list_for_each_entry_safe(mapping, tmp, &ghvm->memory_mappings, list) {
+		gh_vm_mem_mapping_reclaim(ghvm, mapping);
+		kfree(mapping);
+	}
 	put_gh_rm(ghvm->rm);
 	kfree(ghvm);
 	return 0;
diff --git a/drivers/virt/gunyah/vm_mgr.h b/drivers/virt/gunyah/vm_mgr.h
index e47f34de7f9e..6b38bf780f76 100644
--- a/drivers/virt/gunyah/vm_mgr.h
+++ b/drivers/virt/gunyah/vm_mgr.h
@@ -7,14 +7,40 @@ 
 #define _GH_PRIV_VM_MGR_H
 
 #include <linux/gunyah_rsc_mgr.h>
+#include <linux/list.h>
+#include <linux/miscdevice.h>
+#include <linux/mutex.h>
 
 #include <uapi/linux/gunyah.h>
 
 long gh_dev_vm_mgr_ioctl(struct gh_rm *rm, unsigned int cmd, unsigned long arg);
 
+enum gunyah_vm_mem_share_type {
+	VM_MEM_SHARE,
+	VM_MEM_LEND,
+};
+
+struct gunyah_vm_memory_mapping {
+	struct list_head list;
+	enum gunyah_vm_mem_share_type share_type;
+	struct gh_rm_mem_parcel parcel;
+
+	__u64 guest_phys_addr;
+	struct page **pages;
+	unsigned long npages;
+};
+
 struct gunyah_vm {
 	u16 vmid;
-	struct gh_rm_rpc *rm;
+	struct gh_rm *rm;
+
+	struct mutex mm_lock;
+	struct list_head memory_mappings;
 };
 
+struct gunyah_vm_memory_mapping *gh_vm_mem_mapping_alloc(struct gunyah_vm *ghvm,
+							struct gh_userspace_memory_region *region);
+void gh_vm_mem_mapping_reclaim(struct gunyah_vm *ghvm, struct gunyah_vm_memory_mapping *mapping);
+struct gunyah_vm_memory_mapping *gh_vm_mem_mapping_find(struct gunyah_vm *ghvm, u32 label);
+
 #endif
diff --git a/drivers/virt/gunyah/vm_mgr_mm.c b/drivers/virt/gunyah/vm_mgr_mm.c
new file mode 100644
index 000000000000..f2dbdb4ee8ab
--- /dev/null
+++ b/drivers/virt/gunyah/vm_mgr_mm.c
@@ -0,0 +1,223 @@ 
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2022-2023 Qualcomm Innovation Center, Inc. All rights reserved.
+ */
+
+#define pr_fmt(fmt) "gh_vm_mgr: " fmt
+
+#include <linux/gunyah_rsc_mgr.h>
+#include <linux/mm.h>
+
+#include <uapi/linux/gunyah.h>
+
+#include "vm_mgr.h"
+
+static inline bool page_contiguous(phys_addr_t p, phys_addr_t t)
+{
+	return t - p == PAGE_SIZE;
+}
+
+static struct gunyah_vm_memory_mapping *__gh_vm_mem_mapping_find(struct gunyah_vm *ghvm, u32 label)
+{
+	struct gunyah_vm_memory_mapping *mapping;
+
+
+	list_for_each_entry(mapping, &ghvm->memory_mappings, list)
+		if (mapping->parcel.label == label)
+			return mapping;
+
+	return NULL;
+}
+
+void gh_vm_mem_mapping_reclaim(struct gunyah_vm *ghvm, struct gunyah_vm_memory_mapping *mapping)
+{
+	int i, ret = 0;
+
+	if (mapping->parcel.mem_handle != GH_MEM_HANDLE_INVAL) {
+		ret = gh_rm_mem_reclaim(ghvm->rm, &mapping->parcel);
+		if (ret)
+			pr_warn("Failed to reclaim memory parcel for label %d: %d\n",
+				mapping->parcel.label, ret);
+	}
+
+	if (!ret)
+		for (i = 0; i < mapping->npages; i++)
+			unpin_user_page(mapping->pages[i]);
+
+	kfree(mapping->pages);
+	kfree(mapping->parcel.acl_entries);
+	kfree(mapping->parcel.mem_entries);
+
+	mutex_lock(&ghvm->mm_lock);
+	list_del(&mapping->list);
+	mutex_unlock(&ghvm->mm_lock);
+}
+
+struct gunyah_vm_memory_mapping *gh_vm_mem_mapping_find(struct gunyah_vm *ghvm, u32 label)
+{
+	struct gunyah_vm_memory_mapping *mapping;
+	int ret;
+
+	ret = mutex_lock_interruptible(&ghvm->mm_lock);
+	if (ret)
+		return ERR_PTR(ret);
+	mapping = __gh_vm_mem_mapping_find(ghvm, label);
+	mutex_unlock(&ghvm->mm_lock);
+	return mapping ? : ERR_PTR(-ENODEV);
+}
+
+struct gunyah_vm_memory_mapping *gh_vm_mem_mapping_alloc(struct gunyah_vm *ghvm,
+							struct gh_userspace_memory_region *region)
+{
+	phys_addr_t curr_page, prev_page;
+	struct gunyah_vm_memory_mapping *mapping, *tmp_mapping;
+	struct gh_rm_mem_entry *mem_entries;
+	int i, j, pinned, ret = 0;
+	struct gh_rm_mem_parcel *parcel;
+	size_t entry_size;
+	u16 vmid;
+
+	if (!region->memory_size || !PAGE_ALIGNED(region->memory_size) ||
+		!PAGE_ALIGNED(region->userspace_addr))
+		return ERR_PTR(-EINVAL);
+
+	if (!gh_api_has_feature(GH_API_FEATURE_MEMEXTENT))
+		return ERR_PTR(-EOPNOTSUPP);
+
+	ret = mutex_lock_interruptible(&ghvm->mm_lock);
+	if (ret)
+		return ERR_PTR(ret);
+	mapping = __gh_vm_mem_mapping_find(ghvm, region->label);
+	if (mapping) {
+		mutex_unlock(&ghvm->mm_lock);
+		return ERR_PTR(-EEXIST);
+	}
+
+	mapping = kzalloc(sizeof(*mapping), GFP_KERNEL);
+	if (!mapping) {
+		ret = -ENOMEM;
+		goto unlock;
+	}
+
+	mapping->parcel.label = region->label;
+	mapping->guest_phys_addr = region->guest_phys_addr;
+	mapping->npages = region->memory_size >> PAGE_SHIFT;
+	parcel = &mapping->parcel;
+	parcel->mem_handle = GH_MEM_HANDLE_INVAL; /* to be filled later by mem_share/mem_lend */
+	parcel->mem_type = GH_RM_MEM_TYPE_NORMAL;
+
+	/* Check for overlap */
+	list_for_each_entry(tmp_mapping, &ghvm->memory_mappings, list) {
+		if (!((mapping->guest_phys_addr + (mapping->npages << PAGE_SHIFT) <=
+			tmp_mapping->guest_phys_addr) ||
+			(mapping->guest_phys_addr >=
+			tmp_mapping->guest_phys_addr + (tmp_mapping->npages << PAGE_SHIFT)))) {
+			ret = -EEXIST;
+			goto unlock;
+		}
+	}
+
+	list_add(&mapping->list, &ghvm->memory_mappings);
+unlock:
+	mutex_unlock(&ghvm->mm_lock);
+	if (ret)
+		goto free_mapping;
+
+	mapping->pages = kcalloc(mapping->npages, sizeof(*mapping->pages), GFP_KERNEL);
+	if (!mapping->pages) {
+		ret = -ENOMEM;
+		goto reclaim;
+	}
+
+	pinned = pin_user_pages_fast(region->userspace_addr, mapping->npages,
+					FOLL_WRITE | FOLL_LONGTERM, mapping->pages);
+	if (pinned < 0) {
+		ret = pinned;
+		goto reclaim;
+	} else if (pinned != mapping->npages) {
+		ret = -EFAULT;
+		mapping->npages = pinned; /* update npages for reclaim */
+		goto reclaim;
+	}
+
+	if (region->flags & GH_MEM_LENT) {
+		parcel->n_acl_entries = 1;
+		mapping->share_type = VM_MEM_LEND;
+	} else {
+		parcel->n_acl_entries = 2;
+		mapping->share_type = VM_MEM_SHARE;
+	}
+	parcel->acl_entries = kcalloc(parcel->n_acl_entries,
+						sizeof(*parcel->acl_entries),
+						GFP_KERNEL);
+	if (!parcel->acl_entries) {
+		ret = -ENOMEM;
+		goto reclaim;
+	}
+
+	parcel->acl_entries[0].vmid = cpu_to_le16(ghvm->vmid);
+	if (region->flags & GH_MEM_ALLOW_READ)
+		parcel->acl_entries[0].perms |= GH_RM_ACL_R;
+	if (region->flags & GH_MEM_ALLOW_WRITE)
+		parcel->acl_entries[0].perms |= GH_RM_ACL_W;
+	if (region->flags & GH_MEM_ALLOW_EXEC)
+		parcel->acl_entries[0].perms |= GH_RM_ACL_X;
+
+	if (mapping->share_type == VM_MEM_SHARE) {
+		ret = gh_rm_get_vmid(ghvm->rm, &vmid);
+		if (ret) {
+			if (ret > 0) {
+				pr_warn("RM failed to get this VM's VMID: %d", ret);
+				ret = -EINVAL;
+			}
+			goto reclaim;
+		}
+
+		parcel->acl_entries[1].vmid = cpu_to_le16(vmid);
+		/* Host assumed to have all these permissions. Gunyah will not
+		 * grant new permissions if host actually had less than RWX
+		 */
+		parcel->acl_entries[1].perms |= GH_RM_ACL_R | GH_RM_ACL_W | GH_RM_ACL_X;
+	}
+
+	mem_entries = kcalloc(mapping->npages, sizeof(*mem_entries), GFP_KERNEL);
+	if (!mem_entries) {
+		ret = -ENOMEM;
+		goto reclaim;
+	}
+
+	/* reduce number of entries by combining contiguous pages into single memory entry */
+	prev_page = page_to_phys(mapping->pages[0]);
+	mem_entries[0].ipa_base = cpu_to_le64(prev_page);
+	entry_size = PAGE_SIZE;
+	for (i = 1, j = 0; i < mapping->npages; i++) {
+		curr_page = page_to_phys(mapping->pages[i]);
+		if (page_contiguous(prev_page, curr_page)) {
+			entry_size += PAGE_SIZE;
+		} else {
+			mem_entries[j].size = cpu_to_le64(entry_size);
+			j++;
+			mem_entries[j].ipa_base = cpu_to_le64(curr_page);
+			entry_size = PAGE_SIZE;
+		}
+
+		prev_page = curr_page;
+	}
+	mem_entries[j].size = cpu_to_le64(entry_size);
+
+	parcel->n_mem_entries = j + 1;
+	parcel->mem_entries = kmemdup(mem_entries, sizeof(*mem_entries) * parcel->n_mem_entries,
+					GFP_KERNEL);
+	kfree(mem_entries);
+	if (!parcel->mem_entries) {
+		ret = -ENOMEM;
+		goto reclaim;
+	}
+
+	return mapping;
+reclaim:
+	gh_vm_mem_mapping_reclaim(ghvm, mapping);
+free_mapping:
+	kfree(mapping);
+	return ERR_PTR(ret);
+}
diff --git a/include/uapi/linux/gunyah.h b/include/uapi/linux/gunyah.h
index 88a40d6e0b96..574f33b198d0 100644
--- a/include/uapi/linux/gunyah.h
+++ b/include/uapi/linux/gunyah.h
@@ -20,4 +20,26 @@ 
  */
 #define GH_CREATE_VM			_IO(GH_IOCTL_TYPE, 0x0) /* Returns a Gunyah VM fd */
 
+/*
+ * ioctls for VM fds
+ */
+struct gh_userspace_memory_region {
+	__u32 label;
+#define GH_MEM_ALLOW_READ	(1UL << 0)
+#define GH_MEM_ALLOW_WRITE	(1UL << 1)
+#define GH_MEM_ALLOW_EXEC	(1UL << 2)
+/*
+ * The guest will be lent the memory instead of shared.
+ * In other words, the guest has exclusive access to the memory region and the host loses access.
+ */
+#define GH_MEM_LENT		(1UL << 3)
+	__u32 flags;
+	__u64 guest_phys_addr;
+	__u64 memory_size;
+	__u64 userspace_addr;
+};
+
+#define GH_VM_SET_USER_MEM_REGION	_IOW(GH_IOCTL_TYPE, 0x1, \
+						struct gh_userspace_memory_region)
+
 #endif