[RFC,3/5] VFIO: Base framework for new VFIO driver

Message ID	20110901195043.2391.31843.stgit@s20.home (mailing list archive)
State	New, archived
Headers	show Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by demeter2.kernel.org (8.14.4/8.14.4) with ESMTP id p81Jprsa028417 for <patchwork-kvm@patchwork.kernel.org>; Thu, 1 Sep 2011 19:51:53 GMT From: Alex Williamson <alex.williamson@redhat.com> Subject: [RFC PATCH 3/5] VFIO: Base framework for new VFIO driver To: chrisw@sous-sol.org, aik@au1.ibm.com, pmac@au1.ibm.com, dwg@au1.ibm.com, joerg.roedel@amd.com, agraf@suse.de, benve@cisco.com, aafabbri@cisco.com, B08248@freescale.com, B07421@freescale.com, avi@redhat.com, kvm@vger.kernel.org, qemu-devel@nongnu.org, iommu@lists.linux-foundation.org, linux-pci@vger.kernel.org Cc: alex.williamson@redhat.com Date: Thu, 01 Sep 2011 13:50:43 -0600 Message-ID: <20110901195043.2391.31843.stgit@s20.home> In-Reply-To: <20110901194915.2391.97400.stgit@s20.home> References: <20110901194915.2391.97400.stgit@s20.home> User-Agent: StGIT/0.14.3 MIME-Version: 1.0 Content-Type: text/plain; charset="utf-8" Content-Transfer-Encoding: 7bit Sender: kvm-owner@vger.kernel.org Precedence: bulk

> +static long vfio_iommu_unl_ioctl(struct file *filep, > + unsigned int cmd, unsigned long arg) > +{ > + struct vfio_iommu *viommu = filep->private_data; > + struct vfio_dma_map dm; > + int ret = -ENOSYS; > + > + switch (cmd) { > + case VFIO_IOMMU_MAP_DMA: > + if (copy_from_user(&dm, (void __user *)arg, sizeof dm)) > + return -EFAULT; > + ret = 0; // XXX - Do something <chuckles> > + if (!ret && copy_to_user((void __user *)arg, &dm, sizeof dm)) > + ret = -EFAULT; > + break; > + > + case VFIO_IOMMU_UNMAP_DMA: > + if (copy_from_user(&dm, (void __user *)arg, sizeof dm)) > + return -EFAULT; > + ret = 0; // XXX - Do something > + if (!ret && copy_to_user((void __user *)arg, &dm, sizeof dm)) > + ret = -EFAULT; > + break; > + } > + return ret; > +} > + > +#ifdef CONFIG_COMPAT > +static long vfio_iommu_compat_ioctl(struct file *filep, > + unsigned int cmd, unsigned long arg) > +{ > + arg = (unsigned long)compat_ptr(arg); > + return vfio_iommu_unl_ioctl(filep, cmd, arg); > +} > +#endif /* CONFIG_COMPAT */ > + > +const struct file_operations vfio_iommu_fops = { > + .owner = THIS_MODULE, > + .release = vfio_iommu_release, > + .unlocked_ioctl = vfio_iommu_unl_ioctl, > +#ifdef CONFIG_COMPAT > + .compat_ioctl = vfio_iommu_compat_ioctl, > +#endif > +}; > diff --git a/drivers/vfio/vfio_main.c b/drivers/vfio/vfio_main.c .. snip.. > +int vfio_group_add_dev(struct device *dev, void *data) > +{ > + struct vfio_device_ops *ops = data; > + struct list_head *pos; > + struct vfio_group *vgroup = NULL; > + struct vfio_device *vdev = NULL; > + unsigned int group; > + int ret = 0, new_group = 0; 'new_group' should probably be 'bool'. > + > + if (iommu_device_group(dev, &group)) > + return 0; -EEXIST? > + > + mutex_lock(&vfio.group_lock); > + > + list_for_each(pos, &vfio.group_list) { > + vgroup = list_entry(pos, struct vfio_group, next); > + if (vgroup->group == group) > + break; > + vgroup = NULL; > + } > + > + if (!vgroup) { > + int id; > + > + if (unlikely(idr_pre_get(&vfio.idr, GFP_KERNEL) == 0)) { > + ret = -ENOMEM; > + goto out; > + } > + vgroup = kzalloc(sizeof(*vgroup), GFP_KERNEL); > + if (!vgroup) { > + ret = -ENOMEM; > + goto out; > + } > + > + vgroup->group = group; > + INIT_LIST_HEAD(&vgroup->device_list); > + > + ret = idr_get_new(&vfio.idr, vgroup, &id); > + if (ret == 0 && id > MINORMASK) { > + idr_remove(&vfio.idr, id); > + kfree(vgroup); > + ret = -ENOSPC; > + goto out; > + } > + > + vgroup->devt = MKDEV(MAJOR(vfio.devt), id); > + list_add(&vgroup->next, &vfio.group_list); > + device_create(vfio.class, NULL, vgroup->devt, > + vgroup, "%u", group); > + > + new_group = 1; > + } else { > + list_for_each(pos, &vgroup->device_list) { > + vdev = list_entry(pos, struct vfio_device, next); > + if (vdev->dev == dev) > + break; > + vdev = NULL; > + } > + } > + > + if (!vdev) { > + /* Adding a device for a group that's already in use? */ > + /* Maybe we should attach to the domain so others can't */ > + BUG_ON(vgroup->container && > + vgroup->container->iommu && > + vgroup->container->iommu->refcnt); > + > + vdev = ops->new(dev); > + if (IS_ERR(vdev)) { > + /* If we just created this vgroup, tear it down */ > + if (new_group) { > + device_destroy(vfio.class, vgroup->devt); > + idr_remove(&vfio.idr, MINOR(vgroup->devt)); > + list_del(&vgroup->next); > + kfree(vgroup); > + } > + ret = PTR_ERR(vdev); > + goto out; > + } > + list_add(&vdev->next, &vgroup->device_list); > + vdev->dev = dev; > + vdev->ops = ops; > + vdev->vfio = &vfio; > + } > +out: > + mutex_unlock(&vfio.group_lock); > + return ret; > +} > + > +void vfio_group_del_dev(struct device *dev) > +{ > + struct list_head *pos; > + struct vfio_container *vcontainer; > + struct vfio_group *vgroup = NULL; > + struct vfio_device *vdev = NULL; > + unsigned int group; > + > + if (iommu_device_group(dev, &group)) > + return; > + > + mutex_lock(&vfio.group_lock); > + > + list_for_each(pos, &vfio.group_list) { > + vgroup = list_entry(pos, struct vfio_group, next); > + if (vgroup->group == group) > + break; > + vgroup = NULL; > + } > + > + if (!vgroup) > + goto out; > + > + vcontainer = vgroup->container; > + > + list_for_each(pos, &vgroup->device_list) { > + vdev = list_entry(pos, struct vfio_device, next); > + if (vdev->dev == dev) > + break; > + vdev = NULL; > + } > + > + if (!vdev) > + goto out; > + > + /* XXX Did a device we're using go away? */ > + BUG_ON(vdev->refcnt); > + > + if (vcontainer && vcontainer->iommu) { > + iommu_detach_device(vcontainer->iommu->domain, vdev->dev); > + vfio_container_reset_read(vcontainer); > + } > + > + list_del(&vdev->next); > + vdev->ops->free(vdev); > + > + if (list_empty(&vgroup->device_list) && vgroup->refcnt == 0) { > + device_destroy(vfio.class, vgroup->devt); > + idr_remove(&vfio.idr, MINOR(vgroup->devt)); > + list_del(&vgroup->next); > + kfree(vgroup); > + } > +out: > + mutex_unlock(&vfio.group_lock); > +} > + > +static int __vfio_group_viable(struct vfio_container *vcontainer) Just return 'bool' > +{ > + struct list_head *gpos, *dpos; > + > + list_for_each(gpos, &vfio.group_list) { > + struct vfio_group *vgroup; > + vgroup = list_entry(gpos, struct vfio_group, next); > + if (vgroup->container != vcontainer) > + continue; > + > + list_for_each(dpos, &vgroup->device_list) { > + struct vfio_device *vdev; > + vdev = list_entry(dpos, struct vfio_device, next); > + > + if (!vdev->dev->driver || > + vdev->dev->driver->owner != THIS_MODULE) > + return 0; > + } > + } > + return 1; > +} > + > +static int __vfio_close_iommu(struct vfio_container *vcontainer) > +{ > + struct list_head *gpos, *dpos; > + struct vfio_iommu *viommu = vcontainer->iommu; > + struct vfio_group *vgroup; > + struct vfio_device *vdev; > + > + if (!viommu) > + return 0; > + > + if (viommu->refcnt) > + return -EBUSY; > + > + list_for_each(gpos, &vfio.group_list) { > + vgroup = list_entry(gpos, struct vfio_group, next); > + if (vgroup->container != vcontainer) > + continue; > + > + list_for_each(dpos, &vgroup->device_list) { > + vdev = list_entry(dpos, struct vfio_device, next); > + iommu_detach_device(viommu->domain, vdev->dev); > + vdev->iommu = NULL; > + } > + } > + iommu_domain_free(viommu->domain); > + kfree(viommu); > + vcontainer->iommu = NULL; > + return 0; > +} > + > +static int __vfio_open_iommu(struct vfio_container *vcontainer) > +{ > + struct list_head *gpos, *dpos; > + struct vfio_iommu *viommu; > + struct vfio_group *vgroup; > + struct vfio_device *vdev; > + > + if (!__vfio_group_viable(vcontainer)) > + return -EBUSY; > + > + viommu = kzalloc(sizeof(*viommu), GFP_KERNEL); > + if (!viommu) > + return -ENOMEM; > + > + viommu->domain = iommu_domain_alloc(); > + if (!viommu->domain) { > + kfree(viommu); > + return -EFAULT; > + } > + > + viommu->vfio = &vfio; > + vcontainer->iommu = viommu; > + No need for mutex_lock(&vfio.group_lock); Ah, you already hold the lock when using this function. > + list_for_each(gpos, &vfio.group_list) { > + vgroup = list_entry(gpos, struct vfio_group, next); > + if (vgroup->container != vcontainer) > + continue; > + > + list_for_each(dpos, &vgroup->device_list) { > + int ret; > + > + vdev = list_entry(dpos, struct vfio_device, next); > + > + ret = iommu_attach_device(viommu->domain, vdev->dev); > + if (ret) { > + __vfio_close_iommu(vcontainer); > + return ret; > + } > + vdev->iommu = viommu; > + } > + } > + > + if (!allow_unsafe_intrs && > + !iommu_domain_has_cap(viommu->domain, IOMMU_CAP_INTR_REMAP)) { > + __vfio_close_iommu(vcontainer); > + return -EFAULT; > + } > + > + return 0; > +} > + > +static int vfio_group_merge(struct vfio_group *vgroup, int fd) > +{ > + struct vfio_group *vgroup2; > + struct iommu_domain *domain; > + struct list_head *pos; > + struct file *file; > + int ret = 0; > + > + mutex_lock(&vfio.group_lock); > + > + file = fget(fd); > + if (!file) { > + ret = -EBADF; > + goto out_noput; > + } > + if (file->f_op != &vfio_group_fops) { > + ret = -EINVAL; > + goto out; > + } > + > + vgroup2 = file->private_data; > + if (!vgroup2 || vgroup2 == vgroup || vgroup2->mm != vgroup->mm || > + (vgroup2->container->iommu && vgroup2->container->iommu->refcnt)) { > + ret = -EINVAL; > + goto out; > + } > + > + if (!vgroup->container->iommu) { > + ret = __vfio_open_iommu(vgroup->container); > + if (ret) > + goto out; > + } > + > + if (!vgroup2->container->iommu) { > + ret = __vfio_open_iommu(vgroup2->container); > + if (ret) > + goto out; > + } > + > + if (iommu_domain_has_cap(vgroup->container->iommu->domain, > + IOMMU_CAP_CACHE_COHERENCY) != > + iommu_domain_has_cap(vgroup2->container->iommu->domain, > + IOMMU_CAP_CACHE_COHERENCY)) { > + ret = -EINVAL; > + goto out; > + } > + > + ret = __vfio_close_iommu(vgroup2->container); > + if (ret) > + goto out; > + > + domain = vgroup->container->iommu->domain; > + > + list_for_each(pos, &vgroup2->device_list) { > + struct vfio_device *vdev; > + > + vdev = list_entry(pos, struct vfio_device, next); > + > + ret = iommu_attach_device(domain, vdev->dev); > + if (ret) { > + list_for_each(pos, &vgroup2->device_list) { > + struct vfio_device *vdev2; > + > + vdev2 = list_entry(pos, > + struct vfio_device, next); > + if (vdev2 == vdev) > + break; > + > + iommu_detach_device(domain, vdev2->dev); > + vdev2->iommu = NULL; > + } > + goto out; > + } > + vdev->iommu = vgroup->container->iommu; > + } > + > + kfree(vgroup2->container->read_buf); > + kfree(vgroup2->container); > + > + vgroup2->container = vgroup->container; > + vgroup->container->refcnt++; > + vfio_container_reset_read(vgroup->container); > + > +out: > + fput(file); > +out_noput: > + mutex_unlock(&vfio.group_lock); > + return ret; > +} > + > +static int vfio_group_unmerge(struct vfio_group *vgroup, int fd) > +{ > + struct vfio_group *vgroup2; > + struct vfio_container *vcontainer2; > + struct vfio_device *vdev; > + struct list_head *pos; > + struct file *file; > + int ret = 0; > + > + vcontainer2 = kzalloc(sizeof(*vcontainer2), GFP_KERNEL); > + if (!vcontainer2) > + return -ENOMEM; > + > + mutex_lock(&vfio.group_lock); > + > + file = fget(fd); > + if (!file) { > + ret = -EBADF; > + goto out_noput; > + } > + if (file->f_op != &vfio_group_fops) { Hm, I think scripts/checkpath.pl will not like that, but as you said - it is RFC. > + ret = -EINVAL; > + goto out; > + } > + > + vgroup2 = file->private_data; > + if (!vgroup2 || vgroup2 == vgroup || > + vgroup2->container != vgroup->container) { > + ret = -EINVAL; > + goto out; > + } > + > + list_for_each(pos, &vgroup2->device_list) { > + vdev = list_entry(pos, struct vfio_device, next); > + if (vdev->refcnt) { > + ret = -EBUSY; > + goto out; > + } > + } > + > + list_for_each(pos, &vgroup2->device_list) { > + vdev = list_entry(pos, struct vfio_device, next); > + iommu_detach_device(vgroup->container->iommu->domain, > + vdev->dev); > + vdev->iommu = NULL; > + } > + > + vgroup2->container = vcontainer2; > + vcontainer2->refcnt++; > + vgroup->container->refcnt--; > + vfio_container_reset_read(vgroup->container); > +out: > + fput(file); > +out_noput: > + if (ret) > + kfree(vcontainer2); > + mutex_unlock(&vfio.group_lock); > + return ret; > +} > + > +static int vfio_group_get_iommu_fd(struct vfio_group *vgroup) > +{ > + int ret = 0; > + struct vfio_iommu *viommu; > + > + mutex_lock(&vfio.group_lock); > + > + if (!vgroup->container->iommu) { > + ret = __vfio_open_iommu(vgroup->container); > + if (ret) > + goto out; > + } > + > + viommu = vgroup->container->iommu; > + > + if (!viommu->file) { > + viommu->file = anon_inode_getfile("vfio-iommu", > + &vfio_iommu_fops, > + viommu, O_RDWR); > + if (IS_ERR(viommu->file)) { > + ret = PTR_ERR(viommu->file); > + viommu->file = NULL; > + goto out; > + } > + } > + ret = get_unused_fd(); > + if (ret < 0) > + goto out; > + > + fd_install(ret, viommu->file); > + > + vgroup->container->iommu->refcnt++; > +out: > + mutex_unlock(&vfio.group_lock); > + return ret; > +} > + > +static int vfio_group_get_device_fd(struct vfio_group *vgroup, char *buf) > +{ > + struct vfio_container *vcontainer = vgroup->container; > + struct list_head *gpos, *dpos; > + int ret = -ENODEV; > + > + mutex_lock(&vfio.group_lock); > + > + if (!vcontainer->iommu) { > + ret = __vfio_open_iommu(vcontainer); > + if (ret) > + goto out; > + } > + > + list_for_each(gpos, &vfio.group_list) { > + vgroup = list_entry(gpos, struct vfio_group, next); > + if (vgroup->container != vcontainer) > + continue; > + > + list_for_each(dpos, &vgroup->device_list) { > + struct vfio_device *vdev; > + char buf2[MAX_PATH]; > + > + vdev = list_entry(dpos, struct vfio_device, next); > + > + snprintf(buf2, MAX_PATH, "%s", dev_name(vdev->dev)); > + > + if (!strncmp(buf, buf2, MAX_PATH)) { > + if (!vdev->file) { > + vdev->file = anon_inode_getfile( > + "vfio-device", > + &vfio_device_fops, > + vdev, O_RDWR); > + if (IS_ERR(vdev->file)) { > + ret = PTR_ERR(vdev->file); > + vdev->file = NULL; > + goto out; > + } > + } > + ret = get_unused_fd(); > + if (ret < 0) > + goto out; > + > + fd_install(ret, vdev->file); > + > + vdev->refcnt++; > + vcontainer->iommu->refcnt++; > + goto out; > + } > + } > + } > +out: > + mutex_unlock(&vfio.group_lock); > + return ret; > +} > + > +static long vfio_group_unl_ioctl(struct file *filep, > + unsigned int cmd, unsigned long arg) > +{ > + struct vfio_group *vgroup = filep->private_data; > + > + if (vgroup->mm != current->mm) > + return -EIO; > + > + switch (cmd) { > + case VFIO_GROUP_MERGE: > + case VFIO_GROUP_UNMERGE: > + { > + int fd; > + > + if (get_user(fd, (int __user *)arg)) > + return -EFAULT; > + if (fd < 0) > + return -EINVAL; > + > + if (cmd == VFIO_GROUP_MERGE) > + return vfio_group_merge(vgroup, fd); > + else > + return vfio_group_unmerge(vgroup, fd); > + } > + case VFIO_GROUP_GET_IOMMU_FD: > + return vfio_group_get_iommu_fd(vgroup); > + case VFIO_GROUP_GET_DEVICE_FD: > + { > + char *buf; > + int ret; > + > + buf = strndup_user((const char __user *)arg, MAX_PATH); > + if (IS_ERR(buf)) > + return PTR_ERR(buf); > + > + ret = vfio_group_get_device_fd(vgroup, buf); > + kfree(buf); > + return ret; > + } > + } > + return -ENOSYS; > +} > + > + > +#ifdef CONFIG_COMPAT > +static long vfio_group_compat_ioctl(struct file *filep, > + unsigned int cmd, unsigned long arg) > +{ > + arg = (unsigned long)compat_ptr(arg); > + return vfio_group_unl_ioctl(filep, cmd, arg); > +} > +#endif /* CONFIG_COMPAT */ > + > +static int vfio_group_open(struct inode *inode, struct file *filep) > +{ > + struct vfio_group *vgroup; > + int ret = 0; > + > + mutex_lock(&vfio.group_lock); > + > + vgroup = idr_find(&vfio.idr, iminor(inode)); > + > + if (!vgroup) { > + ret = -ENODEV; > + goto out; > + } > + > + if (!vgroup->refcnt) { > + struct vfio_container *vcontainer; > + vcontainer = kzalloc(sizeof(*vcontainer), GFP_KERNEL); > + if (!vcontainer) { > + ret = -ENOMEM; > + goto out; > + } > + vgroup->container = vcontainer; > + vgroup->mm = current->mm; > + } else if (current->mm != vgroup->mm) { > + ret = -EBUSY; > + goto out; > + } > + filep->private_data = vgroup; > + vgroup->refcnt++; > + vgroup->container->refcnt++; > +out: > + mutex_unlock(&vfio.group_lock); > + > + return ret; > +} > + > +static int vfio_group_release(struct inode *inode, struct file *filep) > +{ > + struct vfio_group *vgroup = filep->private_data; > + struct vfio_container *vcontainer = vgroup->container; > + struct list_head *pos; > + int ret = 0; > + > + mutex_lock(&vfio.group_lock); > + > + if (vgroup->refcnt > 1) { > + vgroup->refcnt--; > + vcontainer->refcnt--; > + goto out; > + } > + > + list_for_each(pos, &vgroup->device_list) { > + struct vfio_device *vdev; > + vdev = list_entry(pos, struct vfio_device, next); > + if (vdev->refcnt) { > + ret = -EBUSY; > + goto out; > + } > + } > + > + /* Merged group? */ > + if (vcontainer->refcnt > 1) { > + if (vcontainer->iommu) { > + list_for_each(pos, &vgroup->device_list) { > + struct vfio_device *vdev; > + vdev = list_entry(pos, > + struct vfio_device, next); > + iommu_detach_device(vcontainer->iommu->domain, > + vdev->dev); > + vdev->iommu = NULL; > + } > + } > + vcontainer->refcnt--; > + vfio_container_reset_read(vcontainer); > + } else { > + if (vcontainer->iommu && vcontainer->iommu->refcnt) { > + ret = -EBUSY; > + goto out; > + } > + > + ret = __vfio_close_iommu(vcontainer); > + if (ret) > + goto out; > + > + kfree(vcontainer->read_buf); > + kfree(vcontainer); > + } > + > + vgroup->refcnt--; > + vgroup->mm = NULL; > + vgroup->container = NULL; > + > + /* Possible we had the group open while device members were removed */ > + if (list_empty(&vgroup->device_list)) { > + device_destroy(vfio.class, vgroup->devt); > + idr_remove(&vfio.idr, MINOR(vgroup->devt)); > + list_del(&vgroup->next); > + kfree(vgroup); > + } > +out: > + mutex_unlock(&vfio.group_lock); > + return 0; > +} > + > +static int __vfio_container_create_read_buf(struct vfio_container *vcontainer) > +{ > + struct list_head *gpos, *dpos; > + struct vfio_group *vgroup; > + struct vfio_device *vdev; > + int off = 0; > + char *buf; > + > + buf = kzalloc(MAX_PATH, GFP_KERNEL); > + if (!buf) > + return -ENOMEM; > + > + list_for_each(gpos, &vfio.group_list) { > + vgroup = list_entry(gpos, struct vfio_group, next); > + if (vgroup->container != vcontainer) > + continue; > + > + off += snprintf(buf + off, MAX_PATH, > + "group: %u\n", vgroup->group); > + buf = krealloc(buf, off + MAX_PATH, GFP_KERNEL); > + if (!buf) > + return -ENOMEM; > + memset(buf + off, 0, MAX_PATH); > + > + list_for_each(dpos, &vgroup->device_list) { > + vdev = list_entry(dpos, struct vfio_device, next); > + > + off += snprintf(buf + off, MAX_PATH, > + "device: %s\n", dev_name(vdev->dev)); > + buf = krealloc(buf, off + MAX_PATH, GFP_KERNEL); > + if (!buf) > + return -ENOMEM; > + memset(buf + off, 0, MAX_PATH); > + } > + } > + buf = krealloc(buf, off + 1, GFP_KERNEL); > + if (!buf) > + return -ENOMEM; > + > + vcontainer->read_buf = buf; > + return 0; > +} > + > +static ssize_t vfio_group_read(struct file *filep, char __user *buf, > + size_t count, loff_t *ppos) > +{ > + struct vfio_group *vgroup = filep->private_data; > + struct vfio_container *vcontainer; > + ssize_t ret = 0; > + > + mutex_lock(&vfio.group_lock); > + > + vcontainer = vgroup->container; > + > + if (!vcontainer) { > + ret = -EINVAL; > + goto out; > + } > + > + if (!vcontainer->read_buf) { > + ret = __vfio_container_create_read_buf(vcontainer); > + if (ret) > + goto out; > + } > + > + if (*ppos >= strlen(vcontainer->read_buf) + 1) { > + ret = 0; > + goto out; > + } > + > + if (*ppos + count > strlen(vcontainer->read_buf) + 1) > + count = strlen(vcontainer->read_buf) + 1 - *ppos; > + > + if (copy_to_user(buf, vcontainer->read_buf + *ppos, count)) { > + ret = -EFAULT; > + goto out; > + } > + > + *ppos += count; > + ret = count; > +out: > + mutex_unlock(&vfio.group_lock); > + return ret; > +} > + > +static const struct file_operations vfio_group_fops = { > + .owner = THIS_MODULE, > + .open = vfio_group_open, > + .release = vfio_group_release, > + .read = vfio_group_read, > + .unlocked_ioctl = vfio_group_unl_ioctl, > +#ifdef CONFIG_COMPAT > + .compat_ioctl = vfio_group_compat_ioctl, > +#endif > +}; > + > +static void vfio_class_release(struct kref *kref) > +{ > + class_destroy(vfio.class); > + vfio.class = NULL; > +} > + > +static char *vfio_devnode(struct device *dev, mode_t *mode) > +{ > + return kasprintf(GFP_KERNEL, "vfio/%s", dev_name(dev)); > +} > + > +static int __init vfio_init(void) > +{ > + int ret; > + > + idr_init(&vfio.idr); > + mutex_init(&vfio.group_lock); > + INIT_LIST_HEAD(&vfio.group_list); > + > + kref_init(&vfio.kref); > + vfio.class = class_create(THIS_MODULE, "vfio"); > + if (IS_ERR(vfio.class)) { > + ret = PTR_ERR(vfio.class); > + goto err_class; > + } > + > + vfio.class->devnode = vfio_devnode; > + > + /* FIXME - how many minors to allocate... all of them! */ > + ret = alloc_chrdev_region(&vfio.devt, 0, MINORMASK, "vfio"); > + if (ret) > + goto err_chrdev; > + > + cdev_init(&vfio.cdev, &vfio_group_fops); > + ret = cdev_add(&vfio.cdev, vfio.devt, MINORMASK); > + if (ret) > + goto err_cdev; > + > + pr_info(DRIVER_DESC " version: " DRIVER_VERSION "\n"); > + > + return 0; > + > +err_cdev: > + unregister_chrdev_region(vfio.devt, MINORMASK); > +err_chrdev: > + kref_put(&vfio.kref, vfio_class_release); > +err_class: > + return ret; > +} > + > +static void __exit vfio_cleanup(void) > +{ > + struct list_head *gpos, *gppos; > + > + list_for_each_safe(gpos, gppos, &vfio.group_list) { > + struct vfio_group *vgroup; > + struct list_head *dpos, *dppos; > + > + vgroup = list_entry(gpos, struct vfio_group, next); > + > + list_for_each_safe(dpos, dppos, &vgroup->device_list) { > + struct vfio_device *vdev; > + > + vdev = list_entry(dpos, struct vfio_device, next); > + vfio_group_del_dev(vdev->dev); > + } > + } > + > + idr_destroy(&vfio.idr); > + cdev_del(&vfio.cdev); > + unregister_chrdev_region(vfio.devt, MINORMASK); > + kref_put(&vfio.kref, vfio_class_release); > +} > + > +module_init(vfio_init); > +module_exit(vfio_cleanup); > + > +MODULE_VERSION(DRIVER_VERSION); > +MODULE_LICENSE("GPL v2"); > +MODULE_AUTHOR(DRIVER_AUTHOR); > +MODULE_DESCRIPTION(DRIVER_DESC); > diff --git a/drivers/vfio/vfio_private.h b/drivers/vfio/vfio_private.h > new file mode 100644 > index 0000000..2cc300c > --- /dev/null > +++ b/drivers/vfio/vfio_private.h > @@ -0,0 +1,82 @@ > +/* > + * Copyright (C) 2011 Red Hat, Inc. All rights reserved. > + * Author: Alex Williamson <alex.williamson@redhat.com> > + * > + * This program is free software; you can redistribute it and/or modify > + * it under the terms of the GNU General Public License version 2 as > + * published by the Free Software Foundation. > + * > + * Derived from original vfio: > + * Copyright 2010 Cisco Systems, Inc. All rights reserved. > + * Author: Tom Lyon, pugs@cisco.com > + */ > + > +#include <linux/cdev.h> > +#include <linux/device.h> > +#include <linux/file.h> > +#include <linux/fs.h> > +#include <linux/idr.h> > +#include <linux/iommu.h> > +#include <linux/list.h> > +#include <linux/mm.h> > +#include <linux/mutex.h> > + > +#ifndef VFIO_PRIVATE_H > +#define VFIO_PRIVATE_H > + > +extern const struct file_operations vfio_iommu_fops; > +extern const struct file_operations vfio_device_fops; > + > +struct vfio { > + dev_t devt; > + struct cdev cdev; > + struct list_head group_list; > + struct mutex group_lock; > + struct kref kref; > + struct class *class; > + struct idr idr; > +}; > + > +struct vfio_device_ops { > + struct vfio_device *(* new)(struct device *); > + void (* free)(struct vfio_device *); > + struct file_operations fops; > +}; > + > +struct vfio_iommu { > + struct iommu_domain *domain; > + struct vfio *vfio; > + int refcnt; > + struct file *file; > +}; > + > +struct vfio_device { > + struct device *dev; > + struct list_head next; > + struct file *file; > + struct vfio_device_ops *ops; > + struct vfio *vfio; > + struct vfio_iommu *iommu; > + int refcnt; > +}; > + > +struct vfio_container { > + struct vfio_iommu *iommu; > + char *read_buf; > + int refcnt; > +}; > + > +struct vfio_group { > + dev_t devt; > + unsigned int group; > + int refcnt; > + struct mm_struct *mm; > + struct vfio_container *container; > + struct list_head device_list; > + struct list_head next; > +}; > + > +extern int vfio_group_add_dev(struct device *dev, void *data); > +extern void vfio_group_del_dev(struct device *dev); > + > +#endif /* VFIO_PRIVATE_H */ > > -- > To unsubscribe from this list: send the line "unsubscribe linux-pci" in > the body of a message to majordomo@vger.kernel.org > More majordomo info at http://vger.kernel.org/majordomo-info.html -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html

diff --git a/drivers/Kconfig b/drivers/Kconfig index 3bb154d..5b5fffc 100644 --- a/drivers/Kconfig +++ b/drivers/Kconfig @@ -112,6 +112,8 @@ source "drivers/auxdisplay/Kconfig" source "drivers/uio/Kconfig" +source "drivers/vfio/Kconfig" + source "drivers/vlynq/Kconfig" source "drivers/xen/Kconfig" diff --git a/drivers/Makefile b/drivers/Makefile index 09f3232..6b17848 100644 --- a/drivers/Makefile +++ b/drivers/Makefile @@ -56,6 +56,7 @@ obj-$(CONFIG_ATM) += atm/ obj-$(CONFIG_FUSION) += message/ obj-y += firewire/ obj-$(CONFIG_UIO) += uio/ +obj-$(CONFIG_VFIO) += vfio/ obj-y += cdrom/ obj-y += auxdisplay/ obj-$(CONFIG_PCCARD) += pcmcia/ diff --git a/drivers/vfio/Kconfig b/drivers/vfio/Kconfig new file mode 100644 index 0000000..a150521 --- /dev/null +++ b/drivers/vfio/Kconfig @@ -0,0 +1,5 @@ +menuconfig VFIO + tristate "Non-Privileged User Space driver" + depends on IOMMU_API + help + If you don't know what to do here, say N. diff --git a/drivers/vfio/Makefile b/drivers/vfio/Makefile new file mode 100644 index 0000000..5eaa074 --- /dev/null +++ b/drivers/vfio/Makefile @@ -0,0 +1,3 @@ +obj-$(CONFIG_VFIO) := vfio.o + +vfio-y := vfio_main.o vfio_iommu.o vfio_device.o diff --git a/drivers/vfio/vfio_device.c b/drivers/vfio/vfio_device.c new file mode 100644 index 0000000..101cbbf --- /dev/null +++ b/drivers/vfio/vfio_device.c @@ -0,0 +1,109 @@ +/* + * Copyright (C) 2011 Red Hat, Inc. All rights reserved. + * Author: Alex Williamson <alex.williamson@redhat.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Derived from original vfio: + * Copyright 2010 Cisco Systems, Inc. All rights reserved. + * Author: Tom Lyon, pugs@cisco.com + */ + +/* + * VFIO device module: Common device handling and callouts to other drivers + */ + +#include <linux/module.h> +#include <linux/device.h> +#include <linux/mm.h> +#include <linux/string.h> +#include <linux/interrupt.h> +#include <linux/fs.h> +#include <linux/eventfd.h> +#include <linux/uaccess.h> +#include <linux/compat.h> +#include <linux/vfio.h> + +#include "vfio_private.h" + +static int vfio_device_release(struct inode *inode, struct file *filep) +{ + struct vfio_device *vdev = filep->private_data; + + mutex_lock(&vdev->vfio->group_lock); + vdev->refcnt--; + vdev->iommu->refcnt--; + mutex_unlock(&vdev->vfio->group_lock); + + return 0; +} + +static long vfio_device_unl_ioctl(struct file *filep, + unsigned int cmd, unsigned long arg) +{ + struct vfio_device *vdev = filep->private_data; + int ret = -EINVAL; + + switch (cmd) { + // TBD - what can we handle as common device ioctls? + default: + if (vdev->ops->fops.unlocked_ioctl) + ret = vdev->ops->fops.unlocked_ioctl(filep, cmd, arg); + } + return ret; +} + +static ssize_t vfio_device_read(struct file *filep, char __user *buf, + size_t count, loff_t *ppos) +{ + struct vfio_device *vdev = filep->private_data; + + if (vdev->ops->fops.read) + return vdev->ops->fops.read(filep, buf, count, ppos); + + return -EINVAL; +} + +static ssize_t vfio_device_write(struct file *filep, const char __user *buf, + size_t count, loff_t *ppos) +{ + struct vfio_device *vdev = filep->private_data; + + if (vdev->ops->fops.write) + return vdev->ops->fops.write(filep, buf, count, ppos); + + return -EINVAL; +} + +static int vfio_device_mmap(struct file *filep, struct vm_area_struct *vma) +{ + struct vfio_device *vdev = filep->private_data; + + if (vdev->ops->fops.mmap) + return vdev->ops->fops.mmap(filep, vma); + + return -EINVAL; +} + +#ifdef CONFIG_COMPAT +static long vfio_device_compat_ioctl(struct file *filep, + unsigned int cmd, unsigned long arg) +{ + arg = (unsigned long)compat_ptr(arg); + return vfio_device_unl_ioctl(filep, cmd, arg); +} +#endif /* CONFIG_COMPAT */ + +const struct file_operations vfio_device_fops = { + .owner = THIS_MODULE, + .release = vfio_device_release, + .read = vfio_device_read, + .write = vfio_device_write, + .unlocked_ioctl = vfio_device_unl_ioctl, +#ifdef CONFIG_COMPAT + .compat_ioctl = vfio_device_compat_ioctl, +#endif + .mmap = vfio_device_mmap, +}; diff --git a/drivers/vfio/vfio_iommu.c b/drivers/vfio/vfio_iommu.c new file mode 100644 index 0000000..1a6f321 --- /dev/null +++ b/drivers/vfio/vfio_iommu.c @@ -0,0 +1,81 @@ +/* + * Copyright (C) 2011 Red Hat, Inc. All rights reserved. + * Author: Alex Williamson <alex.williamson@redhat.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Derived from original vfio: + * Copyright 2010 Cisco Systems, Inc. All rights reserved. + * Author: Tom Lyon, pugs@cisco.com + */ + +/* + * VFIO iomm module: iommu fd callbacks + */ + +#include <linux/compat.h> +#include <linux/device.h> +#include <linux/fs.h> +#include <linux/iommu.h> +#include <linux/module.h> +#include <linux/uaccess.h> +#include <linux/vfio.h> + +#include "vfio_private.h" + +static int vfio_iommu_release(struct inode *inode, struct file *filep) +{ + struct vfio_iommu *viommu = filep->private_data; + + mutex_lock(&viommu->vfio->group_lock); + viommu->refcnt--; + mutex_unlock(&viommu->vfio->group_lock); + return 0; +} + +static long vfio_iommu_unl_ioctl(struct file *filep, + unsigned int cmd, unsigned long arg) +{ + struct vfio_iommu *viommu = filep->private_data; + struct vfio_dma_map dm; + int ret = -ENOSYS; + + switch (cmd) { + case VFIO_IOMMU_MAP_DMA: + if (copy_from_user(&dm, (void __user *)arg, sizeof dm)) + return -EFAULT; + ret = 0; // XXX - Do something + if (!ret && copy_to_user((void __user *)arg, &dm, sizeof dm)) + ret = -EFAULT; + break; + + case VFIO_IOMMU_UNMAP_DMA: + if (copy_from_user(&dm, (void __user *)arg, sizeof dm)) + return -EFAULT; + ret = 0; // XXX - Do something + if (!ret && copy_to_user((void __user *)arg, &dm, sizeof dm)) + ret = -EFAULT; + break; + } + return ret; +} + +#ifdef CONFIG_COMPAT +static long vfio_iommu_compat_ioctl(struct file *filep, + unsigned int cmd, unsigned long arg) +{ + arg = (unsigned long)compat_ptr(arg); + return vfio_iommu_unl_ioctl(filep, cmd, arg); +} +#endif /* CONFIG_COMPAT */ + +const struct file_operations vfio_iommu_fops = { + .owner = THIS_MODULE, + .release = vfio_iommu_release, + .unlocked_ioctl = vfio_iommu_unl_ioctl, +#ifdef CONFIG_COMPAT + .compat_ioctl = vfio_iommu_compat_ioctl, +#endif +}; diff --git a/drivers/vfio/vfio_main.c b/drivers/vfio/vfio_main.c new file mode 100644 index 0000000..7f05692 --- /dev/null +++ b/drivers/vfio/vfio_main.c @@ -0,0 +1,879 @@ +/* + * Copyright (C) 2011 Red Hat, Inc. All rights reserved. + * Author: Alex Williamson <alex.williamson@redhat.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Derived from original vfio: + * Copyright 2010 Cisco Systems, Inc. All rights reserved. + * Author: Tom Lyon, pugs@cisco.com + */ + +/* + * VFIO main module: IOMMU group framework + */ + +#include <linux/cdev.h> +#include <linux/compat.h> +#include <linux/device.h> +#include <linux/file.h> +#include <linux/anon_inodes.h> +#include <linux/fs.h> +#include <linux/idr.h> +#include <linux/iommu.h> +#include <linux/mm.h> +#include <linux/module.h> +#include <linux/slab.h> +#include <linux/string.h> +#include <linux/uaccess.h> +#include <linux/vfio.h> + +#include "vfio_private.h" + +#define DRIVER_VERSION "0.2" +#define DRIVER_AUTHOR "Alex Williamson <alex.williamson@redhat.com>" +#define DRIVER_DESC "VFIO - User Level meta-driver" + +#define MAX_PATH 256 + +static int allow_unsafe_intrs; +module_param(allow_unsafe_intrs, int, 0); +MODULE_PARM_DESC(allow_unsafe_intrs, + "Allow use of IOMMUs which do not support interrupt remapping"); + +static struct vfio vfio; +static const struct file_operations vfio_group_fops; + +static inline void vfio_container_reset_read(struct vfio_container *vcontainer) +{ + kfree(vcontainer->read_buf); + vcontainer->read_buf = NULL; +} + +int vfio_group_add_dev(struct device *dev, void *data) +{ + struct vfio_device_ops *ops = data; + struct list_head *pos; + struct vfio_group *vgroup = NULL; + struct vfio_device *vdev = NULL; + unsigned int group; + int ret = 0, new_group = 0; + + if (iommu_device_group(dev, &group)) + return 0; + + mutex_lock(&vfio.group_lock); + + list_for_each(pos, &vfio.group_list) { + vgroup = list_entry(pos, struct vfio_group, next); + if (vgroup->group == group) + break; + vgroup = NULL; + } + + if (!vgroup) { + int id; + + if (unlikely(idr_pre_get(&vfio.idr, GFP_KERNEL) == 0)) { + ret = -ENOMEM; + goto out; + } + vgroup = kzalloc(sizeof(*vgroup), GFP_KERNEL); + if (!vgroup) { + ret = -ENOMEM; + goto out; + } + + vgroup->group = group; + INIT_LIST_HEAD(&vgroup->device_list); + + ret = idr_get_new(&vfio.idr, vgroup, &id); + if (ret == 0 && id > MINORMASK) { + idr_remove(&vfio.idr, id); + kfree(vgroup); + ret = -ENOSPC; + goto out; + } + + vgroup->devt = MKDEV(MAJOR(vfio.devt), id); + list_add(&vgroup->next, &vfio.group_list); + device_create(vfio.class, NULL, vgroup->devt, + vgroup, "%u", group); + + new_group = 1; + } else { + list_for_each(pos, &vgroup->device_list) { + vdev = list_entry(pos, struct vfio_device, next); + if (vdev->dev == dev) + break; + vdev = NULL; + } + } + + if (!vdev) { + /* Adding a device for a group that's already in use? */ + /* Maybe we should attach to the domain so others can't */ + BUG_ON(vgroup->container && + vgroup->container->iommu && + vgroup->container->iommu->refcnt); + + vdev = ops->new(dev); + if (IS_ERR(vdev)) { + /* If we just created this vgroup, tear it down */ + if (new_group) { + device_destroy(vfio.class, vgroup->devt); + idr_remove(&vfio.idr, MINOR(vgroup->devt)); + list_del(&vgroup->next); + kfree(vgroup); + } + ret = PTR_ERR(vdev); + goto out; + } + list_add(&vdev->next, &vgroup->device_list); + vdev->dev = dev; + vdev->ops = ops; + vdev->vfio = &vfio; + } +out: + mutex_unlock(&vfio.group_lock); + return ret; +} + +void vfio_group_del_dev(struct device *dev) +{ + struct list_head *pos; + struct vfio_container *vcontainer; + struct vfio_group *vgroup = NULL; + struct vfio_device *vdev = NULL; + unsigned int group; + + if (iommu_device_group(dev, &group)) + return; + + mutex_lock(&vfio.group_lock); + + list_for_each(pos, &vfio.group_list) { + vgroup = list_entry(pos, struct vfio_group, next); + if (vgroup->group == group) + break; + vgroup = NULL; + } + + if (!vgroup) + goto out; + + vcontainer = vgroup->container; + + list_for_each(pos, &vgroup->device_list) { + vdev = list_entry(pos, struct vfio_device, next); + if (vdev->dev == dev) + break; + vdev = NULL; + } + + if (!vdev) + goto out; + + /* XXX Did a device we're using go away? */ + BUG_ON(vdev->refcnt); + + if (vcontainer && vcontainer->iommu) { + iommu_detach_device(vcontainer->iommu->domain, vdev->dev); + vfio_container_reset_read(vcontainer); + } + + list_del(&vdev->next); + vdev->ops->free(vdev); + + if (list_empty(&vgroup->device_list) && vgroup->refcnt == 0) { + device_destroy(vfio.class, vgroup->devt); + idr_remove(&vfio.idr, MINOR(vgroup->devt)); + list_del(&vgroup->next); + kfree(vgroup); + } +out: + mutex_unlock(&vfio.group_lock); +} + +static int __vfio_group_viable(struct vfio_container *vcontainer) +{ + struct list_head *gpos, *dpos; + + list_for_each(gpos, &vfio.group_list) { + struct vfio_group *vgroup; + vgroup = list_entry(gpos, struct vfio_group, next); + if (vgroup->container != vcontainer) + continue; + + list_for_each(dpos, &vgroup->device_list) { + struct vfio_device *vdev; + vdev = list_entry(dpos, struct vfio_device, next); + + if (!vdev->dev->driver || + vdev->dev->driver->owner != THIS_MODULE) + return 0; + } + } + return 1; +} + +static int __vfio_close_iommu(struct vfio_container *vcontainer) +{ + struct list_head *gpos, *dpos; + struct vfio_iommu *viommu = vcontainer->iommu; + struct vfio_group *vgroup; + struct vfio_device *vdev; + + if (!viommu) + return 0; + + if (viommu->refcnt) + return -EBUSY; + + list_for_each(gpos, &vfio.group_list) { + vgroup = list_entry(gpos, struct vfio_group, next); + if (vgroup->container != vcontainer) + continue; + + list_for_each(dpos, &vgroup->device_list) { + vdev = list_entry(dpos, struct vfio_device, next); + iommu_detach_device(viommu->domain, vdev->dev); + vdev->iommu = NULL; + } + } + iommu_domain_free(viommu->domain); + kfree(viommu); + vcontainer->iommu = NULL; + return 0; +} + +static int __vfio_open_iommu(struct vfio_container *vcontainer) +{ + struct list_head *gpos, *dpos; + struct vfio_iommu *viommu; + struct vfio_group *vgroup; + struct vfio_device *vdev; + + if (!__vfio_group_viable(vcontainer)) + return -EBUSY; + + viommu = kzalloc(sizeof(*viommu), GFP_KERNEL); + if (!viommu) + return -ENOMEM; + + viommu->domain = iommu_domain_alloc(); + if (!viommu->domain) { + kfree(viommu); + return -EFAULT; + } + + viommu->vfio = &vfio; + vcontainer->iommu = viommu; + + list_for_each(gpos, &vfio.group_list) { + vgroup = list_entry(gpos, struct vfio_group, next); + if (vgroup->container != vcontainer) + continue; + + list_for_each(dpos, &vgroup->device_list) { + int ret; + + vdev = list_entry(dpos, struct vfio_device, next); + + ret = iommu_attach_device(viommu->domain, vdev->dev); + if (ret) { + __vfio_close_iommu(vcontainer); + return ret; + } + vdev->iommu = viommu; + } + } + + if (!allow_unsafe_intrs && + !iommu_domain_has_cap(viommu->domain, IOMMU_CAP_INTR_REMAP)) { + __vfio_close_iommu(vcontainer); + return -EFAULT; + } + + return 0; +} + +static int vfio_group_merge(struct vfio_group *vgroup, int fd) +{ + struct vfio_group *vgroup2; + struct iommu_domain *domain; + struct list_head *pos; + struct file *file; + int ret = 0; + + mutex_lock(&vfio.group_lock); + + file = fget(fd); + if (!file) { + ret = -EBADF; + goto out_noput; + } + if (file->f_op != &vfio_group_fops) { + ret = -EINVAL; + goto out; + } + + vgroup2 = file->private_data; + if (!vgroup2 || vgroup2 == vgroup || vgroup2->mm != vgroup->mm || + (vgroup2->container->iommu && vgroup2->container->iommu->refcnt)) { + ret = -EINVAL; + goto out; + } + + if (!vgroup->container->iommu) { + ret = __vfio_open_iommu(vgroup->container); + if (ret) + goto out; + } + + if (!vgroup2->container->iommu) { + ret = __vfio_open_iommu(vgroup2->container); + if (ret) + goto out; + } + + if (iommu_domain_has_cap(vgroup->container->iommu->domain, + IOMMU_CAP_CACHE_COHERENCY) != + iommu_domain_has_cap(vgroup2->container->iommu->domain, + IOMMU_CAP_CACHE_COHERENCY)) { + ret = -EINVAL; + goto out; + } + + ret = __vfio_close_iommu(vgroup2->container); + if (ret) + goto out; + + domain = vgroup->container->iommu->domain; + + list_for_each(pos, &vgroup2->device_list) { + struct vfio_device *vdev; + + vdev = list_entry(pos, struct vfio_device, next); + + ret = iommu_attach_device(domain, vdev->dev); + if (ret) { + list_for_each(pos, &vgroup2->device_list) { + struct vfio_device *vdev2; + + vdev2 = list_entry(pos, + struct vfio_device, next); + if (vdev2 == vdev) + break; + + iommu_detach_device(domain, vdev2->dev); + vdev2->iommu = NULL; + } + goto out; + } + vdev->iommu = vgroup->container->iommu; + } + + kfree(vgroup2->container->read_buf); + kfree(vgroup2->container); + + vgroup2->container = vgroup->container; + vgroup->container->refcnt++; + vfio_container_reset_read(vgroup->container); + +out: + fput(file); +out_noput: + mutex_unlock(&vfio.group_lock); + return ret; +} + +static int vfio_group_unmerge(struct vfio_group *vgroup, int fd) +{ + struct vfio_group *vgroup2; + struct vfio_container *vcontainer2; + struct vfio_device *vdev; + struct list_head *pos; + struct file *file; + int ret = 0; + + vcontainer2 = kzalloc(sizeof(*vcontainer2), GFP_KERNEL); + if (!vcontainer2) + return -ENOMEM; + + mutex_lock(&vfio.group_lock); + + file = fget(fd); + if (!file) { + ret = -EBADF; + goto out_noput; + } + if (file->f_op != &vfio_group_fops) { + ret = -EINVAL; + goto out; + } + + vgroup2 = file->private_data; + if (!vgroup2 || vgroup2 == vgroup || + vgroup2->container != vgroup->container) { + ret = -EINVAL; + goto out; + } + + list_for_each(pos, &vgroup2->device_list) { + vdev = list_entry(pos, struct vfio_device, next); + if (vdev->refcnt) { + ret = -EBUSY; + goto out; + } + } + + list_for_each(pos, &vgroup2->device_list) { + vdev = list_entry(pos, struct vfio_device, next); + iommu_detach_device(vgroup->container->iommu->domain, + vdev->dev); + vdev->iommu = NULL; + } + + vgroup2->container = vcontainer2; + vcontainer2->refcnt++; + vgroup->container->refcnt--; + vfio_container_reset_read(vgroup->container); +out: + fput(file); +out_noput: + if (ret) + kfree(vcontainer2); + mutex_unlock(&vfio.group_lock); + return ret; +} + +static int vfio_group_get_iommu_fd(struct vfio_group *vgroup) +{ + int ret = 0; + struct vfio_iommu *viommu; + + mutex_lock(&vfio.group_lock); + + if (!vgroup->container->iommu) { + ret = __vfio_open_iommu(vgroup->container); + if (ret) + goto out; + } + + viommu = vgroup->container->iommu; + + if (!viommu->file) { + viommu->file = anon_inode_getfile("vfio-iommu", + &vfio_iommu_fops, + viommu, O_RDWR); + if (IS_ERR(viommu->file)) { + ret = PTR_ERR(viommu->file); + viommu->file = NULL; + goto out; + } + } + ret = get_unused_fd(); + if (ret < 0) + goto out; + + fd_install(ret, viommu->file); + + vgroup->container->iommu->refcnt++; +out: + mutex_unlock(&vfio.group_lock); + return ret; +} + +static int vfio_group_get_device_fd(struct vfio_group *vgroup, char *buf) +{ + struct vfio_container *vcontainer = vgroup->container; + struct list_head *gpos, *dpos; + int ret = -ENODEV; + + mutex_lock(&vfio.group_lock); + + if (!vcontainer->iommu) { + ret = __vfio_open_iommu(vcontainer); + if (ret) + goto out; + } + + list_for_each(gpos, &vfio.group_list) { + vgroup = list_entry(gpos, struct vfio_group, next); + if (vgroup->container != vcontainer) + continue; + + list_for_each(dpos, &vgroup->device_list) { + struct vfio_device *vdev; + char buf2[MAX_PATH]; + + vdev = list_entry(dpos, struct vfio_device, next); + + snprintf(buf2, MAX_PATH, "%s", dev_name(vdev->dev)); + + if (!strncmp(buf, buf2, MAX_PATH)) { + if (!vdev->file) { + vdev->file = anon_inode_getfile( + "vfio-device", + &vfio_device_fops, + vdev, O_RDWR); + if (IS_ERR(vdev->file)) { + ret = PTR_ERR(vdev->file); + vdev->file = NULL; + goto out; + } + } + ret = get_unused_fd(); + if (ret < 0) + goto out; + + fd_install(ret, vdev->file); + + vdev->refcnt++; + vcontainer->iommu->refcnt++; + goto out; + } + } + } +out: + mutex_unlock(&vfio.group_lock); + return ret; +} + +static long vfio_group_unl_ioctl(struct file *filep, + unsigned int cmd, unsigned long arg) +{ + struct vfio_group *vgroup = filep->private_data; + + if (vgroup->mm != current->mm) + return -EIO; + + switch (cmd) { + case VFIO_GROUP_MERGE: + case VFIO_GROUP_UNMERGE: + { + int fd; + + if (get_user(fd, (int __user *)arg)) + return -EFAULT; + if (fd < 0) + return -EINVAL; + + if (cmd == VFIO_GROUP_MERGE) + return vfio_group_merge(vgroup, fd); + else + return vfio_group_unmerge(vgroup, fd); + } + case VFIO_GROUP_GET_IOMMU_FD: + return vfio_group_get_iommu_fd(vgroup); + case VFIO_GROUP_GET_DEVICE_FD: + { + char *buf; + int ret; + + buf = strndup_user((const char __user *)arg, MAX_PATH); + if (IS_ERR(buf)) + return PTR_ERR(buf); + + ret = vfio_group_get_device_fd(vgroup, buf); + kfree(buf); + return ret; + } + } + return -ENOSYS; +} + + +#ifdef CONFIG_COMPAT +static long vfio_group_compat_ioctl(struct file *filep, + unsigned int cmd, unsigned long arg) +{ + arg = (unsigned long)compat_ptr(arg); + return vfio_group_unl_ioctl(filep, cmd, arg); +} +#endif /* CONFIG_COMPAT */ + +static int vfio_group_open(struct inode *inode, struct file *filep) +{ + struct vfio_group *vgroup; + int ret = 0; + + mutex_lock(&vfio.group_lock); + + vgroup = idr_find(&vfio.idr, iminor(inode)); + + if (!vgroup) { + ret = -ENODEV; + goto out; + } + + if (!vgroup->refcnt) { + struct vfio_container *vcontainer; + vcontainer = kzalloc(sizeof(*vcontainer), GFP_KERNEL); + if (!vcontainer) { + ret = -ENOMEM; + goto out; + } + vgroup->container = vcontainer; + vgroup->mm = current->mm; + } else if (current->mm != vgroup->mm) { + ret = -EBUSY; + goto out; + } + filep->private_data = vgroup; + vgroup->refcnt++; + vgroup->container->refcnt++; +out: + mutex_unlock(&vfio.group_lock); + + return ret; +} + +static int vfio_group_release(struct inode *inode, struct file *filep) +{ + struct vfio_group *vgroup = filep->private_data; + struct vfio_container *vcontainer = vgroup->container; + struct list_head *pos; + int ret = 0; + + mutex_lock(&vfio.group_lock); + + if (vgroup->refcnt > 1) { + vgroup->refcnt--; + vcontainer->refcnt--; + goto out; + } + + list_for_each(pos, &vgroup->device_list) { + struct vfio_device *vdev; + vdev = list_entry(pos, struct vfio_device, next); + if (vdev->refcnt) { + ret = -EBUSY; + goto out; + } + } + + /* Merged group? */ + if (vcontainer->refcnt > 1) { + if (vcontainer->iommu) { + list_for_each(pos, &vgroup->device_list) { + struct vfio_device *vdev; + vdev = list_entry(pos, + struct vfio_device, next); + iommu_detach_device(vcontainer->iommu->domain, + vdev->dev); + vdev->iommu = NULL; + } + } + vcontainer->refcnt--; + vfio_container_reset_read(vcontainer); + } else { + if (vcontainer->iommu && vcontainer->iommu->refcnt) { + ret = -EBUSY; + goto out; + } + + ret = __vfio_close_iommu(vcontainer); + if (ret) + goto out; + + kfree(vcontainer->read_buf); + kfree(vcontainer); + } + + vgroup->refcnt--; + vgroup->mm = NULL; + vgroup->container = NULL; + + /* Possible we had the group open while device members were removed */ + if (list_empty(&vgroup->device_list)) { + device_destroy(vfio.class, vgroup->devt); + idr_remove(&vfio.idr, MINOR(vgroup->devt)); + list_del(&vgroup->next); + kfree(vgroup); + } +out: + mutex_unlock(&vfio.group_lock); + return 0; +} + +static int __vfio_container_create_read_buf(struct vfio_container *vcontainer) +{ + struct list_head *gpos, *dpos; + struct vfio_group *vgroup; + struct vfio_device *vdev; + int off = 0; + char *buf; + + buf = kzalloc(MAX_PATH, GFP_KERNEL); + if (!buf) + return -ENOMEM; + + list_for_each(gpos, &vfio.group_list) { + vgroup = list_entry(gpos, struct vfio_group, next); + if (vgroup->container != vcontainer) + continue; + + off += snprintf(buf + off, MAX_PATH, + "group: %u\n", vgroup->group); + buf = krealloc(buf, off + MAX_PATH, GFP_KERNEL); + if (!buf) + return -ENOMEM; + memset(buf + off, 0, MAX_PATH); + + list_for_each(dpos, &vgroup->device_list) { + vdev = list_entry(dpos, struct vfio_device, next); + + off += snprintf(buf + off, MAX_PATH, + "device: %s\n", dev_name(vdev->dev)); + buf = krealloc(buf, off + MAX_PATH, GFP_KERNEL); + if (!buf) + return -ENOMEM; + memset(buf + off, 0, MAX_PATH); + } + } + buf = krealloc(buf, off + 1, GFP_KERNEL); + if (!buf) + return -ENOMEM; + + vcontainer->read_buf = buf; + return 0; +} + +static ssize_t vfio_group_read(struct file *filep, char __user *buf, + size_t count, loff_t *ppos) +{ + struct vfio_group *vgroup = filep->private_data; + struct vfio_container *vcontainer; + ssize_t ret = 0; + + mutex_lock(&vfio.group_lock); + + vcontainer = vgroup->container; + + if (!vcontainer) { + ret = -EINVAL; + goto out; + } + + if (!vcontainer->read_buf) { + ret = __vfio_container_create_read_buf(vcontainer); + if (ret) + goto out; + } + + if (*ppos >= strlen(vcontainer->read_buf) + 1) { + ret = 0; + goto out; + } + + if (*ppos + count > strlen(vcontainer->read_buf) + 1) + count = strlen(vcontainer->read_buf) + 1 - *ppos; + + if (copy_to_user(buf, vcontainer->read_buf + *ppos, count)) { + ret = -EFAULT; + goto out; + } + + *ppos += count; + ret = count; +out: + mutex_unlock(&vfio.group_lock); + return ret; +} + +static const struct file_operations vfio_group_fops = { + .owner = THIS_MODULE, + .open = vfio_group_open, + .release = vfio_group_release, + .read = vfio_group_read, + .unlocked_ioctl = vfio_group_unl_ioctl, +#ifdef CONFIG_COMPAT + .compat_ioctl = vfio_group_compat_ioctl, +#endif +}; + +static void vfio_class_release(struct kref *kref) +{ + class_destroy(vfio.class); + vfio.class = NULL; +} + +static char *vfio_devnode(struct device *dev, mode_t *mode) +{ + return kasprintf(GFP_KERNEL, "vfio/%s", dev_name(dev)); +} + +static int __init vfio_init(void) +{ + int ret; + + idr_init(&vfio.idr); + mutex_init(&vfio.group_lock); + INIT_LIST_HEAD(&vfio.group_list); + + kref_init(&vfio.kref); + vfio.class = class_create(THIS_MODULE, "vfio"); + if (IS_ERR(vfio.class)) { + ret = PTR_ERR(vfio.class); + goto err_class; + } + + vfio.class->devnode = vfio_devnode; + + /* FIXME - how many minors to allocate... all of them! */ + ret = alloc_chrdev_region(&vfio.devt, 0, MINORMASK, "vfio"); + if (ret) + goto err_chrdev; + + cdev_init(&vfio.cdev, &vfio_group_fops); + ret = cdev_add(&vfio.cdev, vfio.devt, MINORMASK); + if (ret) + goto err_cdev; + + pr_info(DRIVER_DESC " version: " DRIVER_VERSION "\n"); + + return 0; + +err_cdev: + unregister_chrdev_region(vfio.devt, MINORMASK); +err_chrdev: + kref_put(&vfio.kref, vfio_class_release); +err_class: + return ret; +} + +static void __exit vfio_cleanup(void) +{ + struct list_head *gpos, *gppos; + + list_for_each_safe(gpos, gppos, &vfio.group_list) { + struct vfio_group *vgroup; + struct list_head *dpos, *dppos; + + vgroup = list_entry(gpos, struct vfio_group, next); + + list_for_each_safe(dpos, dppos, &vgroup->device_list) { + struct vfio_device *vdev; + + vdev = list_entry(dpos, struct vfio_device, next); + vfio_group_del_dev(vdev->dev); + } + } + + idr_destroy(&vfio.idr); + cdev_del(&vfio.cdev); + unregister_chrdev_region(vfio.devt, MINORMASK); + kref_put(&vfio.kref, vfio_class_release); +} + +module_init(vfio_init); +module_exit(vfio_cleanup); + +MODULE_VERSION(DRIVER_VERSION); +MODULE_LICENSE("GPL v2"); +MODULE_AUTHOR(DRIVER_AUTHOR); +MODULE_DESCRIPTION(DRIVER_DESC); diff --git a/drivers/vfio/vfio_private.h b/drivers/vfio/vfio_private.h new file mode 100644 index 0000000..2cc300c --- /dev/null +++ b/drivers/vfio/vfio_private.h @@ -0,0 +1,82 @@ +/* + * Copyright (C) 2011 Red Hat, Inc. All rights reserved. + * Author: Alex Williamson <alex.williamson@redhat.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Derived from original vfio: + * Copyright 2010 Cisco Systems, Inc. All rights reserved. + * Author: Tom Lyon, pugs@cisco.com + */ + +#include <linux/cdev.h> +#include <linux/device.h> +#include <linux/file.h> +#include <linux/fs.h> +#include <linux/idr.h> +#include <linux/iommu.h> +#include <linux/list.h> +#include <linux/mm.h> +#include <linux/mutex.h> + +#ifndef VFIO_PRIVATE_H +#define VFIO_PRIVATE_H + +extern const struct file_operations vfio_iommu_fops; +extern const struct file_operations vfio_device_fops; + +struct vfio { + dev_t devt; + struct cdev cdev; + struct list_head group_list; + struct mutex group_lock; + struct kref kref; + struct class *class; + struct idr idr; +}; + +struct vfio_device_ops { + struct vfio_device *(* new)(struct device *); + void (* free)(struct vfio_device *); + struct file_operations fops; +}; + +struct vfio_iommu { + struct iommu_domain *domain; + struct vfio *vfio; + int refcnt; + struct file *file; +}; + +struct vfio_device { + struct device *dev; + struct list_head next; + struct file *file; + struct vfio_device_ops *ops; + struct vfio *vfio; + struct vfio_iommu *iommu; + int refcnt; +}; + +struct vfio_container { + struct vfio_iommu *iommu; + char *read_buf; + int refcnt; +}; + +struct vfio_group { + dev_t devt; + unsigned int group; + int refcnt; + struct mm_struct *mm; + struct vfio_container *container; + struct list_head device_list; + struct list_head next; +}; + +extern int vfio_group_add_dev(struct device *dev, void *data); +extern void vfio_group_del_dev(struct device *dev); + +#endif /* VFIO_PRIVATE_H */

[RFC,3/5] VFIO: Base framework for new VFIO driver

Commit Message

Comments

Patch