Message ID | 20230304010632.2127470-14-quic_eberman@quicinc.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | Drivers for gunyah hypervisor | expand |
On 04/03/2023 01:06, Elliot Berman wrote: > Add remaining ioctls to support non-proxy VM boot: > > - Gunyah Resource Manager uses the VM's devicetree to configure the > virtual machine. The location of the devicetree in the guest's > virtual memory can be declared via the SET_DTB_CONFIG ioctl. > - Trigger start of the virtual machine with VM_START ioctl. > > Co-developed-by: Prakruthi Deepak Heragu <quic_pheragu@quicinc.com> > Signed-off-by: Prakruthi Deepak Heragu <quic_pheragu@quicinc.com> > Signed-off-by: Elliot Berman <quic_eberman@quicinc.com> > --- > drivers/virt/gunyah/vm_mgr.c | 243 ++++++++++++++++++++++++++++++-- > drivers/virt/gunyah/vm_mgr.h | 10 ++ > drivers/virt/gunyah/vm_mgr_mm.c | 23 +++ > include/linux/gunyah_rsc_mgr.h | 6 + > include/uapi/linux/gunyah.h | 13 ++ > 5 files changed, 282 insertions(+), 13 deletions(-) > ... > diff --git a/include/uapi/linux/gunyah.h b/include/uapi/linux/gunyah.h > index a19207e3e065..d6abd8605a2e 100644 > --- a/include/uapi/linux/gunyah.h > +++ b/include/uapi/linux/gunyah.h > @@ -49,4 +49,17 @@ struct gh_userspace_memory_region { > #define GH_VM_SET_USER_MEM_REGION _IOW(GH_IOCTL_TYPE, 0x1, \ > struct gh_userspace_memory_region) > > +/** > + * struct gh_vm_dtb_config - Set the location of the VM's devicetree blob > + * @guest_phys_addr: Address of the VM's devicetree in guest memory. > + * @size: Maximum size of the devicetree. > + */ > +struct gh_vm_dtb_config { > + __u64 guest_phys_addr; > + __u64 size; > +}; > +#define GH_VM_SET_DTB_CONFIG _IOW(GH_IOCTL_TYPE, 0x2, struct gh_vm_dtb_config) > + > +#define GH_VM_START _IO(GH_IOCTL_TYPE, 0x3) A comment here that this is going to *ONLY* start an un-authenticated VM would be useful to the users. with that fixed, Reviewed-by: Srinivas Kandagatla <srinivas.kandagatla@linaro.org> --srini > + > #endif
On 3/3/23 7:06 PM, Elliot Berman wrote: > Add remaining ioctls to support non-proxy VM boot: > > - Gunyah Resource Manager uses the VM's devicetree to configure the > virtual machine. The location of the devicetree in the guest's > virtual memory can be declared via the SET_DTB_CONFIG ioctl. > - Trigger start of the virtual machine with VM_START ioctl. > > Co-developed-by: Prakruthi Deepak Heragu <quic_pheragu@quicinc.com> > Signed-off-by: Prakruthi Deepak Heragu <quic_pheragu@quicinc.com> > Signed-off-by: Elliot Berman <quic_eberman@quicinc.com> I identify one bug here, possibly another. And I have a few suggestions about things that could improve code readability. -Alex > --- > drivers/virt/gunyah/vm_mgr.c | 243 ++++++++++++++++++++++++++++++-- > drivers/virt/gunyah/vm_mgr.h | 10 ++ > drivers/virt/gunyah/vm_mgr_mm.c | 23 +++ > include/linux/gunyah_rsc_mgr.h | 6 + > include/uapi/linux/gunyah.h | 13 ++ > 5 files changed, 282 insertions(+), 13 deletions(-) > > diff --git a/drivers/virt/gunyah/vm_mgr.c b/drivers/virt/gunyah/vm_mgr.c > index e950274c6a53..299b9bb81edc 100644 > --- a/drivers/virt/gunyah/vm_mgr.c > +++ b/drivers/virt/gunyah/vm_mgr.c > @@ -9,37 +9,118 @@ > #include <linux/file.h> > #include <linux/gunyah_rsc_mgr.h> > #include <linux/miscdevice.h> > +#include <linux/mm.h> > #include <linux/module.h> > > #include <uapi/linux/gunyah.h> > > #include "vm_mgr.h" > > +static int gh_vm_rm_notification_status(struct gh_vm *ghvm, void *data) > +{ > + struct gh_rm_vm_status_payload *payload = data; > + > + if (payload->vmid != ghvm->vmid) > + return NOTIFY_OK; > + > + /* All other state transitions are synchronous to a corresponding RM call */ > + if (payload->vm_status == GH_RM_VM_STATUS_RESET) { > + down_write(&ghvm->status_lock); > + ghvm->vm_status = payload->vm_status; > + up_write(&ghvm->status_lock); > + wake_up(&ghvm->vm_status_wait); > + } > + > + return NOTIFY_DONE; > +} > + > +static int gh_vm_rm_notification_exited(struct gh_vm *ghvm, void *data) > +{ > + struct gh_rm_vm_exited_payload *payload = data; > + > + if (payload->vmid != ghvm->vmid) > + return NOTIFY_OK; > + > + down_write(&ghvm->status_lock); > + ghvm->vm_status = GH_RM_VM_STATUS_EXITED; > + up_write(&ghvm->status_lock); > + > + return NOTIFY_DONE; > +} > + > +static int gh_vm_rm_notification(struct notifier_block *nb, unsigned long action, void *data) > +{ > + struct gh_vm *ghvm = container_of(nb, struct gh_vm, nb); > + > + switch (action) { > + case GH_RM_NOTIFICATION_VM_STATUS: > + return gh_vm_rm_notification_status(ghvm, data); > + case GH_RM_NOTIFICATION_VM_EXITED: > + return gh_vm_rm_notification_exited(ghvm, data); > + default: > + return NOTIFY_OK; > + } > +} > + > +static void gh_vm_stop(struct gh_vm *ghvm) > +{ > + int ret; > + > + down_write(&ghvm->status_lock); > + if (ghvm->vm_status == GH_RM_VM_STATUS_RUNNING) { > + ret = gh_rm_vm_stop(ghvm->rm, ghvm->vmid); > + if (ret) > + dev_warn(ghvm->parent, "Failed to stop VM: %d\n", ret); > + } > + > + ghvm->vm_status = GH_RM_VM_STATUS_EXITED; > + up_write(&ghvm->status_lock); > +} > + > static void gh_vm_free(struct work_struct *work) > { > struct gh_vm *ghvm = container_of(work, struct gh_vm, free_work); > struct gh_vm_mem *mapping, *tmp; > int ret; > > - mutex_lock(&ghvm->mm_lock); > - list_for_each_entry_safe(mapping, tmp, &ghvm->memory_mappings, list) { > - gh_vm_mem_reclaim(ghvm, mapping); > - kfree(mapping); > - } > - mutex_unlock(&ghvm->mm_lock); > - > - ret = gh_rm_dealloc_vmid(ghvm->rm, ghvm->vmid); > - if (ret) > - pr_warn("Failed to deallocate vmid: %d\n", ret); > + switch (ghvm->vm_status) { > + case GH_RM_VM_STATUS_RUNNING: > + gh_vm_stop(ghvm); > + fallthrough; > + case GH_RM_VM_STATUS_INIT_FAILED: > + case GH_RM_VM_STATUS_LOAD: > + case GH_RM_VM_STATUS_EXITED: > + mutex_lock(&ghvm->mm_lock); > + list_for_each_entry_safe(mapping, tmp, &ghvm->memory_mappings, list) { > + gh_vm_mem_reclaim(ghvm, mapping); > + kfree(mapping); > + } > + mutex_unlock(&ghvm->mm_lock); > + fallthrough; > + case GH_RM_VM_STATUS_NO_STATE: > + ret = gh_rm_dealloc_vmid(ghvm->rm, ghvm->vmid); > + if (ret) > + dev_warn(ghvm->parent, "Failed to deallocate vmid: %d\n", ret); > + > + gh_rm_notifier_unregister(ghvm->rm, &ghvm->nb); I think you should unregister the notifier before you deallocate the VMID. I think the notifier might be able to use the VMID. > + gh_rm_put(ghvm->rm); > + kfree(ghvm); > + break; > + default: > + dev_err(ghvm->parent, "VM is unknown state: %d. VM will not be cleaned up.\n", > + ghvm->vm_status); > > - put_gh_rm(ghvm->rm); > - kfree(ghvm); > + gh_rm_notifier_unregister(ghvm->rm, &ghvm->nb); > + gh_rm_put(ghvm->rm); > + kfree(ghvm); > + break; > + } > } > > static __must_check struct gh_vm *gh_vm_alloc(struct gh_rm *rm) > { > struct gh_vm *ghvm; > - int vmid; > + int vmid, ret; > > vmid = gh_rm_alloc_vmid(rm, 0); > if (vmid < 0) > @@ -55,13 +136,130 @@ static __must_check struct gh_vm *gh_vm_alloc(struct gh_rm *rm) > ghvm->vmid = vmid; > ghvm->rm = rm; > > + init_waitqueue_head(&ghvm->vm_status_wait); > + ghvm->nb.notifier_call = gh_vm_rm_notification; > + ret = gh_rm_notifier_register(rm, &ghvm->nb); > + if (ret) { > + gh_rm_put(rm); > + gh_rm_dealloc_vmid(rm, vmid); > + kfree(ghvm); > + return ERR_PTR(ret); > + } > + > mutex_init(&ghvm->mm_lock); > INIT_LIST_HEAD(&ghvm->memory_mappings); > + init_rwsem(&ghvm->status_lock); > INIT_WORK(&ghvm->free_work, gh_vm_free); > + ghvm->vm_status = GH_RM_VM_STATUS_LOAD; > > return ghvm; > } > > +static int gh_vm_start(struct gh_vm *ghvm) > +{ > + struct gh_vm_mem *mapping; > + u64 dtb_offset; > + u32 mem_handle; > + int ret; > + > + down_write(&ghvm->status_lock); > + if (ghvm->vm_status != GH_RM_VM_STATUS_LOAD) { > + up_write(&ghvm->status_lock); > + return 0; > + } > + > + ghvm->vm_status = GH_RM_VM_STATUS_RESET; > + > + mutex_lock(&ghvm->mm_lock); > + list_for_each_entry(mapping, &ghvm->memory_mappings, list) { > + switch (mapping->share_type) { > + case VM_MEM_LEND: > + ret = gh_rm_mem_lend(ghvm->rm, &mapping->parcel); > + break; > + case VM_MEM_SHARE: > + ret = gh_rm_mem_share(ghvm->rm, &mapping->parcel); > + break; > + } > + if (ret) { > + dev_warn(ghvm->parent, "Failed to %s parcel %d: %d\n", > + mapping->share_type == VM_MEM_LEND ? "lend" : "share", > + mapping->parcel.label, > + ret); > + goto err; > + } > + } > + mutex_unlock(&ghvm->mm_lock); > + > + mapping = gh_vm_mem_find_by_addr(ghvm, ghvm->dtb_config.guest_phys_addr, > + ghvm->dtb_config.size); > + if (!mapping) { > + dev_warn(ghvm->parent, "Failed to find the memory_handle for DTB\n"); > + ret = -EINVAL; > + goto err; > + } > + > + mem_handle = mapping->parcel.mem_handle; > + dtb_offset = ghvm->dtb_config.guest_phys_addr - mapping->guest_phys_addr; > + > + ret = gh_rm_vm_configure(ghvm->rm, ghvm->vmid, ghvm->auth, mem_handle, > + 0, 0, dtb_offset, ghvm->dtb_config.size); > + if (ret) { > + dev_warn(ghvm->parent, "Failed to configure VM: %d\n", ret); > + goto err; > + } > + > + ret = gh_rm_vm_init(ghvm->rm, ghvm->vmid); > + if (ret) { > + dev_warn(ghvm->parent, "Failed to initialize VM: %d\n", ret); > + goto err; > + } > + > + ret = gh_rm_vm_start(ghvm->rm, ghvm->vmid); > + if (ret) { > + dev_warn(ghvm->parent, "Failed to start VM: %d\n", ret); > + goto err; > + } > + > + ghvm->vm_status = GH_RM_VM_STATUS_RUNNING; > + up_write(&ghvm->status_lock); > + return ret; > +err: > + ghvm->vm_status = GH_RM_VM_STATUS_INIT_FAILED; > + /* gh_vm_free will handle releasing resources and reclaiming memory */ > + up_write(&ghvm->status_lock); > + return ret; > +} > + > +static int gh_vm_ensure_started(struct gh_vm *ghvm) > +{ > + int ret; > + > + ret = down_read_interruptible(&ghvm->status_lock); > + if (ret) > + return ret; > + > + /* Unlikely because VM is typically started */ > + if (unlikely(ghvm->vm_status == GH_RM_VM_STATUS_LOAD)) { > + up_read(&ghvm->status_lock); > + ret = gh_vm_start(ghvm); > + if (ret) > + goto out; You have already released the status lock at this point. So going to "out" will do it again. This is a BUG. I think you just want to return ret, and you don't need the "out" error path. > + /** gh_vm_start() is guaranteed to bring status out of > + * GH_RM_VM_STATUS_LOAD, thus inifitely recursive call is not > + * possible > + */ > + return gh_vm_ensure_started(ghvm); > + } > + > + /* Unlikely because VM is typically running */ > + if (unlikely(ghvm->vm_status != GH_RM_VM_STATUS_RUNNING)) > + ret = -ENODEV; > + > +out: > + up_read(&ghvm->status_lock); > + return ret; > +} > + > static long gh_vm_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) > { > struct gh_vm *ghvm = filp->private_data; > @@ -85,6 +283,25 @@ static long gh_vm_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) > r = gh_vm_mem_alloc(ghvm, ®ion); > break; > } > + case GH_VM_SET_DTB_CONFIG: { > + struct gh_vm_dtb_config dtb_config; > + > + if (copy_from_user(&dtb_config, argp, sizeof(dtb_config))) > + return -EFAULT; > + It's clear that the base of the DTB does not need to be page aligned. But why do you round up the size to a page boundary? (It might be the "extra for overlay" I comment on elsewhere, but even if so, it's worth mentioning this.) > + dtb_config.size = PAGE_ALIGN(dtb_config.size); > + if (dtb_config.guest_phys_addr + dtb_config.size < dtb_config.guest_phys_addr) > + return -EOVERFLOW; > + > + ghvm->dtb_config = dtb_config; > + > + r = 0; > + break; > + } > + case GH_VM_START: { > + r = gh_vm_ensure_started(ghvm); > + break; > + } > default: > r = -ENOTTY; > break; > diff --git a/drivers/virt/gunyah/vm_mgr.h b/drivers/virt/gunyah/vm_mgr.h > index c9f6fa5478ed..26bcc2ae4478 100644 > --- a/drivers/virt/gunyah/vm_mgr.h > +++ b/drivers/virt/gunyah/vm_mgr.h > @@ -10,6 +10,8 @@ > #include <linux/list.h> > #include <linux/miscdevice.h> > #include <linux/mutex.h> > +#include <linux/rwsem.h> > +#include <linux/wait.h> > > #include <uapi/linux/gunyah.h> > > @@ -34,6 +36,13 @@ struct gh_vm { > u16 vmid; > struct gh_rm *rm; > struct device *parent; > + enum gh_rm_vm_auth_mechanism auth; > + struct gh_vm_dtb_config dtb_config; > + > + struct notifier_block nb; > + enum gh_rm_vm_status vm_status; > + wait_queue_head_t vm_status_wait; > + struct rw_semaphore status_lock; > > struct work_struct free_work; > struct mutex mm_lock; > @@ -44,5 +53,6 @@ int gh_vm_mem_alloc(struct gh_vm *ghvm, struct gh_userspace_memory_region *regio > void gh_vm_mem_reclaim(struct gh_vm *ghvm, struct gh_vm_mem *mapping); > int gh_vm_mem_free(struct gh_vm *ghvm, u32 label); > struct gh_vm_mem *gh_vm_mem_find_by_label(struct gh_vm *ghvm, u32 label); > +struct gh_vm_mem *gh_vm_mem_find_by_addr(struct gh_vm *ghvm, u64 guest_phys_addr, u32 size); > > #endif > diff --git a/drivers/virt/gunyah/vm_mgr_mm.c b/drivers/virt/gunyah/vm_mgr_mm.c > index db6f55cef37f..6e1d2e8bddb7 100644 > --- a/drivers/virt/gunyah/vm_mgr_mm.c > +++ b/drivers/virt/gunyah/vm_mgr_mm.c > @@ -47,6 +47,29 @@ void gh_vm_mem_reclaim(struct gh_vm *ghvm, struct gh_vm_mem *mapping) > list_del(&mapping->list); > } > I think you should call this gh_vm_mem_find_mapping(). You are finding the mapping that contains the given range. > +struct gh_vm_mem *gh_vm_mem_find_by_addr(struct gh_vm *ghvm, u64 guest_phys_addr, u32 size) > +{ > + struct gh_vm_mem *mapping = NULL; > + int ret; > + > + ret = mutex_lock_interruptible(&ghvm->mm_lock); > + if (ret) > + return ERR_PTR(ret); > + I think you could slightly modify this, and replace this loop and the one in gh_vm_mem_alloc() with a call to a helper function. What I suggest is that you define a function like __gh_vm_mem_overlap(ghvm, offset, size). It would return a gh_vm_mem pointer if it found any mapping that overlapped the range, or null if none is found. Here, you could then check the returned result to ensure the entire range fits within it. And in gh_vm_mem_alloc() you could simply use the result to indicate that you can't allocate the range (because at least one existing range covers all or part of the new one). > + list_for_each_entry(mapping, &ghvm->memory_mappings, list) { > + if (guest_phys_addr >= mapping->guest_phys_addr && > + (guest_phys_addr + size <= mapping->guest_phys_addr + > + (mapping->npages << PAGE_SHIFT))) { > + goto unlock; > + } > + } > + > + mapping = NULL; > +unlock: > + mutex_unlock(&ghvm->mm_lock); > + return mapping; > +} > + > struct gh_vm_mem *gh_vm_mem_find_by_label(struct gh_vm *ghvm, u32 label) > { > struct gh_vm_mem *mapping; > diff --git a/include/linux/gunyah_rsc_mgr.h b/include/linux/gunyah_rsc_mgr.h > index 88a429dad09e..8b0b46f28e39 100644 > --- a/include/linux/gunyah_rsc_mgr.h > +++ b/include/linux/gunyah_rsc_mgr.h > @@ -29,6 +29,12 @@ struct gh_rm_vm_exited_payload { > #define GH_RM_NOTIFICATION_VM_EXITED 0x56100001 > > enum gh_rm_vm_status { > + /** > + * RM doesn't have a state where load partially failed because > + * only Linux I have no idea what the comment above means... Please fix. Several of the values below are never explicitly assigned, and some are used but not assigned. The others apparently might come back from the resource manager? Why, for example, are the PAUSED, AUTH, and RESETTING statuses defined if we don't use them? > + */ > + GH_RM_VM_STATUS_LOAD_FAILED = -1, > + > GH_RM_VM_STATUS_NO_STATE = 0, > GH_RM_VM_STATUS_INIT = 1, > GH_RM_VM_STATUS_READY = 2, > diff --git a/include/uapi/linux/gunyah.h b/include/uapi/linux/gunyah.h > index a19207e3e065..d6abd8605a2e 100644 > --- a/include/uapi/linux/gunyah.h > +++ b/include/uapi/linux/gunyah.h > @@ -49,4 +49,17 @@ struct gh_userspace_memory_region { > #define GH_VM_SET_USER_MEM_REGION _IOW(GH_IOCTL_TYPE, 0x1, \ > struct gh_userspace_memory_region) > > +/** > + * struct gh_vm_dtb_config - Set the location of the VM's devicetree blob > + * @guest_phys_addr: Address of the VM's devicetree in guest memory. > + * @size: Maximum size of the devicetree. > + */ > +struct gh_vm_dtb_config { > + __u64 guest_phys_addr; > + __u64 size; > +}; > +#define GH_VM_SET_DTB_CONFIG _IOW(GH_IOCTL_TYPE, 0x2, struct gh_vm_dtb_config) > + > +#define GH_VM_START _IO(GH_IOCTL_TYPE, 0x3) > + > #endif
On 3/21/2023 7:24 AM, Srinivas Kandagatla wrote: > On 04/03/2023 01:06, Elliot Berman wrote: >> + >> +#define GH_VM_START _IO(GH_IOCTL_TYPE, 0x3) > A comment here that this is going to *ONLY* start an un-authenticated VM > would be useful to the users. > There is only support for unauthenticated VM in the UAPI being proposed and I'd like to re-use GH_VM_START ioctl for other VM types as well. Is the comment really useful? I can easily see forgetting to remove the comment and then being more confusing once the other VM types get added. Thanks, Elliot
On 4/11/23 4:07 PM, Elliot Berman wrote: > > > On 3/21/2023 7:24 AM, Srinivas Kandagatla wrote: > >> On 04/03/2023 01:06, Elliot Berman wrote: >>> + >>> +#define GH_VM_START _IO(GH_IOCTL_TYPE, 0x3) >> A comment here that this is going to *ONLY* start an un-authenticated >> VM would be useful to the users. >> > > There is only support for unauthenticated VM in the UAPI being proposed > and I'd like to re-use GH_VM_START ioctl for other VM types as well. Is > the comment really useful? I can easily see forgetting to remove the > comment and then being more confusing once the other VM types get added. It's up to you. And in general, I think your responses to my comments have been fine--even when you just explain why you don't plan to implement my suggestion. Thank you. -Alex > > Thanks, > Elliot
On 3/31/2023 7:26 AM, Alex Elder wrote: > On 3/3/23 7:06 PM, Elliot Berman wrote: >> diff --git a/include/linux/gunyah_rsc_mgr.h >> b/include/linux/gunyah_rsc_mgr.h >> index 88a429dad09e..8b0b46f28e39 100644 >> --- a/include/linux/gunyah_rsc_mgr.h >> +++ b/include/linux/gunyah_rsc_mgr.h >> @@ -29,6 +29,12 @@ struct gh_rm_vm_exited_payload { >> #define GH_RM_NOTIFICATION_VM_EXITED 0x56100001 >> enum gh_rm_vm_status { >> + /** >> + * RM doesn't have a state where load partially failed because >> + * only Linux > > I have no idea what the comment above means... Please fix. > > Several of the values below are never explicitly assigned, > and some are used but not assigned. The others apparently > might come back from the resource manager? Why, for > example, are the PAUSED, AUTH, and RESETTING statuses > defined if we don't use them? > I ended up no longer needing VM_STATUS_LOAD_FAILED. The other status values are defined by Gunyah resource manager. RM will notify us about the state transitions. Some of the state transitions can be inferred by Linux directly. For instance, gh_rm_vm_init() will transition the VM from GH_RM_VM_STATUS_INIT to GH_RM_VM_STATUS_READY iff it returns successfully. There is one instance where we wait for VM to exit during the VM teardown as well. Thanks, Elliot >> + */ >> + GH_RM_VM_STATUS_LOAD_FAILED = -1, >> + >> GH_RM_VM_STATUS_NO_STATE = 0, >> GH_RM_VM_STATUS_INIT = 1, >> GH_RM_VM_STATUS_READY = 2, >> diff --git a/include/uapi/linux/gunyah.h b/include/uapi/linux/gunyah.h >> index a19207e3e065..d6abd8605a2e 100644 >> --- a/include/uapi/linux/gunyah.h >> +++ b/include/uapi/linux/gunyah.h >> @@ -49,4 +49,17 @@ struct gh_userspace_memory_region { >> #define GH_VM_SET_USER_MEM_REGION _IOW(GH_IOCTL_TYPE, 0x1, \ >> struct gh_userspace_memory_region) >> +/** >> + * struct gh_vm_dtb_config - Set the location of the VM's devicetree >> blob >> + * @guest_phys_addr: Address of the VM's devicetree in guest memory. >> + * @size: Maximum size of the devicetree. >> + */ >> +struct gh_vm_dtb_config { >> + __u64 guest_phys_addr; >> + __u64 size; >> +}; >> +#define GH_VM_SET_DTB_CONFIG _IOW(GH_IOCTL_TYPE, 0x2, struct >> gh_vm_dtb_config) >> + >> +#define GH_VM_START _IO(GH_IOCTL_TYPE, 0x3) >> + >> #endif >
diff --git a/drivers/virt/gunyah/vm_mgr.c b/drivers/virt/gunyah/vm_mgr.c index e950274c6a53..299b9bb81edc 100644 --- a/drivers/virt/gunyah/vm_mgr.c +++ b/drivers/virt/gunyah/vm_mgr.c @@ -9,37 +9,118 @@ #include <linux/file.h> #include <linux/gunyah_rsc_mgr.h> #include <linux/miscdevice.h> +#include <linux/mm.h> #include <linux/module.h> #include <uapi/linux/gunyah.h> #include "vm_mgr.h" +static int gh_vm_rm_notification_status(struct gh_vm *ghvm, void *data) +{ + struct gh_rm_vm_status_payload *payload = data; + + if (payload->vmid != ghvm->vmid) + return NOTIFY_OK; + + /* All other state transitions are synchronous to a corresponding RM call */ + if (payload->vm_status == GH_RM_VM_STATUS_RESET) { + down_write(&ghvm->status_lock); + ghvm->vm_status = payload->vm_status; + up_write(&ghvm->status_lock); + wake_up(&ghvm->vm_status_wait); + } + + return NOTIFY_DONE; +} + +static int gh_vm_rm_notification_exited(struct gh_vm *ghvm, void *data) +{ + struct gh_rm_vm_exited_payload *payload = data; + + if (payload->vmid != ghvm->vmid) + return NOTIFY_OK; + + down_write(&ghvm->status_lock); + ghvm->vm_status = GH_RM_VM_STATUS_EXITED; + up_write(&ghvm->status_lock); + + return NOTIFY_DONE; +} + +static int gh_vm_rm_notification(struct notifier_block *nb, unsigned long action, void *data) +{ + struct gh_vm *ghvm = container_of(nb, struct gh_vm, nb); + + switch (action) { + case GH_RM_NOTIFICATION_VM_STATUS: + return gh_vm_rm_notification_status(ghvm, data); + case GH_RM_NOTIFICATION_VM_EXITED: + return gh_vm_rm_notification_exited(ghvm, data); + default: + return NOTIFY_OK; + } +} + +static void gh_vm_stop(struct gh_vm *ghvm) +{ + int ret; + + down_write(&ghvm->status_lock); + if (ghvm->vm_status == GH_RM_VM_STATUS_RUNNING) { + ret = gh_rm_vm_stop(ghvm->rm, ghvm->vmid); + if (ret) + dev_warn(ghvm->parent, "Failed to stop VM: %d\n", ret); + } + + ghvm->vm_status = GH_RM_VM_STATUS_EXITED; + up_write(&ghvm->status_lock); +} + static void gh_vm_free(struct work_struct *work) { struct gh_vm *ghvm = container_of(work, struct gh_vm, free_work); struct gh_vm_mem *mapping, *tmp; int ret; - mutex_lock(&ghvm->mm_lock); - list_for_each_entry_safe(mapping, tmp, &ghvm->memory_mappings, list) { - gh_vm_mem_reclaim(ghvm, mapping); - kfree(mapping); - } - mutex_unlock(&ghvm->mm_lock); - - ret = gh_rm_dealloc_vmid(ghvm->rm, ghvm->vmid); - if (ret) - pr_warn("Failed to deallocate vmid: %d\n", ret); + switch (ghvm->vm_status) { + case GH_RM_VM_STATUS_RUNNING: + gh_vm_stop(ghvm); + fallthrough; + case GH_RM_VM_STATUS_INIT_FAILED: + case GH_RM_VM_STATUS_LOAD: + case GH_RM_VM_STATUS_EXITED: + mutex_lock(&ghvm->mm_lock); + list_for_each_entry_safe(mapping, tmp, &ghvm->memory_mappings, list) { + gh_vm_mem_reclaim(ghvm, mapping); + kfree(mapping); + } + mutex_unlock(&ghvm->mm_lock); + fallthrough; + case GH_RM_VM_STATUS_NO_STATE: + ret = gh_rm_dealloc_vmid(ghvm->rm, ghvm->vmid); + if (ret) + dev_warn(ghvm->parent, "Failed to deallocate vmid: %d\n", ret); + + gh_rm_notifier_unregister(ghvm->rm, &ghvm->nb); + gh_rm_put(ghvm->rm); + kfree(ghvm); + break; + default: + dev_err(ghvm->parent, "VM is unknown state: %d. VM will not be cleaned up.\n", + ghvm->vm_status); - put_gh_rm(ghvm->rm); - kfree(ghvm); + gh_rm_notifier_unregister(ghvm->rm, &ghvm->nb); + gh_rm_put(ghvm->rm); + kfree(ghvm); + break; + } } static __must_check struct gh_vm *gh_vm_alloc(struct gh_rm *rm) { struct gh_vm *ghvm; - int vmid; + int vmid, ret; vmid = gh_rm_alloc_vmid(rm, 0); if (vmid < 0) @@ -55,13 +136,130 @@ static __must_check struct gh_vm *gh_vm_alloc(struct gh_rm *rm) ghvm->vmid = vmid; ghvm->rm = rm; + init_waitqueue_head(&ghvm->vm_status_wait); + ghvm->nb.notifier_call = gh_vm_rm_notification; + ret = gh_rm_notifier_register(rm, &ghvm->nb); + if (ret) { + gh_rm_put(rm); + gh_rm_dealloc_vmid(rm, vmid); + kfree(ghvm); + return ERR_PTR(ret); + } + mutex_init(&ghvm->mm_lock); INIT_LIST_HEAD(&ghvm->memory_mappings); + init_rwsem(&ghvm->status_lock); INIT_WORK(&ghvm->free_work, gh_vm_free); + ghvm->vm_status = GH_RM_VM_STATUS_LOAD; return ghvm; } +static int gh_vm_start(struct gh_vm *ghvm) +{ + struct gh_vm_mem *mapping; + u64 dtb_offset; + u32 mem_handle; + int ret; + + down_write(&ghvm->status_lock); + if (ghvm->vm_status != GH_RM_VM_STATUS_LOAD) { + up_write(&ghvm->status_lock); + return 0; + } + + ghvm->vm_status = GH_RM_VM_STATUS_RESET; + + mutex_lock(&ghvm->mm_lock); + list_for_each_entry(mapping, &ghvm->memory_mappings, list) { + switch (mapping->share_type) { + case VM_MEM_LEND: + ret = gh_rm_mem_lend(ghvm->rm, &mapping->parcel); + break; + case VM_MEM_SHARE: + ret = gh_rm_mem_share(ghvm->rm, &mapping->parcel); + break; + } + if (ret) { + dev_warn(ghvm->parent, "Failed to %s parcel %d: %d\n", + mapping->share_type == VM_MEM_LEND ? "lend" : "share", + mapping->parcel.label, + ret); + goto err; + } + } + mutex_unlock(&ghvm->mm_lock); + + mapping = gh_vm_mem_find_by_addr(ghvm, ghvm->dtb_config.guest_phys_addr, + ghvm->dtb_config.size); + if (!mapping) { + dev_warn(ghvm->parent, "Failed to find the memory_handle for DTB\n"); + ret = -EINVAL; + goto err; + } + + mem_handle = mapping->parcel.mem_handle; + dtb_offset = ghvm->dtb_config.guest_phys_addr - mapping->guest_phys_addr; + + ret = gh_rm_vm_configure(ghvm->rm, ghvm->vmid, ghvm->auth, mem_handle, + 0, 0, dtb_offset, ghvm->dtb_config.size); + if (ret) { + dev_warn(ghvm->parent, "Failed to configure VM: %d\n", ret); + goto err; + } + + ret = gh_rm_vm_init(ghvm->rm, ghvm->vmid); + if (ret) { + dev_warn(ghvm->parent, "Failed to initialize VM: %d\n", ret); + goto err; + } + + ret = gh_rm_vm_start(ghvm->rm, ghvm->vmid); + if (ret) { + dev_warn(ghvm->parent, "Failed to start VM: %d\n", ret); + goto err; + } + + ghvm->vm_status = GH_RM_VM_STATUS_RUNNING; + up_write(&ghvm->status_lock); + return ret; +err: + ghvm->vm_status = GH_RM_VM_STATUS_INIT_FAILED; + /* gh_vm_free will handle releasing resources and reclaiming memory */ + up_write(&ghvm->status_lock); + return ret; +} + +static int gh_vm_ensure_started(struct gh_vm *ghvm) +{ + int ret; + + ret = down_read_interruptible(&ghvm->status_lock); + if (ret) + return ret; + + /* Unlikely because VM is typically started */ + if (unlikely(ghvm->vm_status == GH_RM_VM_STATUS_LOAD)) { + up_read(&ghvm->status_lock); + ret = gh_vm_start(ghvm); + if (ret) + goto out; + /** gh_vm_start() is guaranteed to bring status out of + * GH_RM_VM_STATUS_LOAD, thus inifitely recursive call is not + * possible + */ + return gh_vm_ensure_started(ghvm); + } + + /* Unlikely because VM is typically running */ + if (unlikely(ghvm->vm_status != GH_RM_VM_STATUS_RUNNING)) + ret = -ENODEV; + +out: + up_read(&ghvm->status_lock); + return ret; +} + static long gh_vm_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) { struct gh_vm *ghvm = filp->private_data; @@ -85,6 +283,25 @@ static long gh_vm_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) r = gh_vm_mem_alloc(ghvm, ®ion); break; } + case GH_VM_SET_DTB_CONFIG: { + struct gh_vm_dtb_config dtb_config; + + if (copy_from_user(&dtb_config, argp, sizeof(dtb_config))) + return -EFAULT; + + dtb_config.size = PAGE_ALIGN(dtb_config.size); + if (dtb_config.guest_phys_addr + dtb_config.size < dtb_config.guest_phys_addr) + return -EOVERFLOW; + + ghvm->dtb_config = dtb_config; + + r = 0; + break; + } + case GH_VM_START: { + r = gh_vm_ensure_started(ghvm); + break; + } default: r = -ENOTTY; break; diff --git a/drivers/virt/gunyah/vm_mgr.h b/drivers/virt/gunyah/vm_mgr.h index c9f6fa5478ed..26bcc2ae4478 100644 --- a/drivers/virt/gunyah/vm_mgr.h +++ b/drivers/virt/gunyah/vm_mgr.h @@ -10,6 +10,8 @@ #include <linux/list.h> #include <linux/miscdevice.h> #include <linux/mutex.h> +#include <linux/rwsem.h> +#include <linux/wait.h> #include <uapi/linux/gunyah.h> @@ -34,6 +36,13 @@ struct gh_vm { u16 vmid; struct gh_rm *rm; struct device *parent; + enum gh_rm_vm_auth_mechanism auth; + struct gh_vm_dtb_config dtb_config; + + struct notifier_block nb; + enum gh_rm_vm_status vm_status; + wait_queue_head_t vm_status_wait; + struct rw_semaphore status_lock; struct work_struct free_work; struct mutex mm_lock; @@ -44,5 +53,6 @@ int gh_vm_mem_alloc(struct gh_vm *ghvm, struct gh_userspace_memory_region *regio void gh_vm_mem_reclaim(struct gh_vm *ghvm, struct gh_vm_mem *mapping); int gh_vm_mem_free(struct gh_vm *ghvm, u32 label); struct gh_vm_mem *gh_vm_mem_find_by_label(struct gh_vm *ghvm, u32 label); +struct gh_vm_mem *gh_vm_mem_find_by_addr(struct gh_vm *ghvm, u64 guest_phys_addr, u32 size); #endif diff --git a/drivers/virt/gunyah/vm_mgr_mm.c b/drivers/virt/gunyah/vm_mgr_mm.c index db6f55cef37f..6e1d2e8bddb7 100644 --- a/drivers/virt/gunyah/vm_mgr_mm.c +++ b/drivers/virt/gunyah/vm_mgr_mm.c @@ -47,6 +47,29 @@ void gh_vm_mem_reclaim(struct gh_vm *ghvm, struct gh_vm_mem *mapping) list_del(&mapping->list); } +struct gh_vm_mem *gh_vm_mem_find_by_addr(struct gh_vm *ghvm, u64 guest_phys_addr, u32 size) +{ + struct gh_vm_mem *mapping = NULL; + int ret; + + ret = mutex_lock_interruptible(&ghvm->mm_lock); + if (ret) + return ERR_PTR(ret); + + list_for_each_entry(mapping, &ghvm->memory_mappings, list) { + if (guest_phys_addr >= mapping->guest_phys_addr && + (guest_phys_addr + size <= mapping->guest_phys_addr + + (mapping->npages << PAGE_SHIFT))) { + goto unlock; + } + } + + mapping = NULL; +unlock: + mutex_unlock(&ghvm->mm_lock); + return mapping; +} + struct gh_vm_mem *gh_vm_mem_find_by_label(struct gh_vm *ghvm, u32 label) { struct gh_vm_mem *mapping; diff --git a/include/linux/gunyah_rsc_mgr.h b/include/linux/gunyah_rsc_mgr.h index 88a429dad09e..8b0b46f28e39 100644 --- a/include/linux/gunyah_rsc_mgr.h +++ b/include/linux/gunyah_rsc_mgr.h @@ -29,6 +29,12 @@ struct gh_rm_vm_exited_payload { #define GH_RM_NOTIFICATION_VM_EXITED 0x56100001 enum gh_rm_vm_status { + /** + * RM doesn't have a state where load partially failed because + * only Linux + */ + GH_RM_VM_STATUS_LOAD_FAILED = -1, + GH_RM_VM_STATUS_NO_STATE = 0, GH_RM_VM_STATUS_INIT = 1, GH_RM_VM_STATUS_READY = 2, diff --git a/include/uapi/linux/gunyah.h b/include/uapi/linux/gunyah.h index a19207e3e065..d6abd8605a2e 100644 --- a/include/uapi/linux/gunyah.h +++ b/include/uapi/linux/gunyah.h @@ -49,4 +49,17 @@ struct gh_userspace_memory_region { #define GH_VM_SET_USER_MEM_REGION _IOW(GH_IOCTL_TYPE, 0x1, \ struct gh_userspace_memory_region) +/** + * struct gh_vm_dtb_config - Set the location of the VM's devicetree blob + * @guest_phys_addr: Address of the VM's devicetree in guest memory. + * @size: Maximum size of the devicetree. + */ +struct gh_vm_dtb_config { + __u64 guest_phys_addr; + __u64 size; +}; +#define GH_VM_SET_DTB_CONFIG _IOW(GH_IOCTL_TYPE, 0x2, struct gh_vm_dtb_config) + +#define GH_VM_START _IO(GH_IOCTL_TYPE, 0x3) + #endif