[v7,2/4] vfio: VFIO driver for mediated devices

Message ID	1472097235-6332-3-git-send-email-kwankhede@nvidia.com (mailing list archive)
State	New, archived
Headers	show Return-Path: <kvm-owner@kernel.org> From: Kirti Wankhede <kwankhede@nvidia.com> To: <alex.williamson@redhat.com>, <pbonzini@redhat.com>, <kraxel@redhat.com>, <cjia@nvidia.com> CC: <qemu-devel@nongnu.org>, <kvm@vger.kernel.org>, <kevin.tian@intel.com>, <jike.song@intel.com>, <bjsdjshi@linux.vnet.ibm.com>, Kirti Wankhede <kwankhede@nvidia.com> Subject: [PATCH v7 2/4] vfio: VFIO driver for mediated devices Date: Thu, 25 Aug 2016 09:23:53 +0530 Message-ID: <1472097235-6332-3-git-send-email-kwankhede@nvidia.com> In-Reply-To: <1472097235-6332-1-git-send-email-kwankhede@nvidia.com> References: <1472097235-6332-1-git-send-email-kwankhede@nvidia.com> MIME-Version: 1.0 Content-Type: text/plain Sender: kvm-owner@vger.kernel.org Precedence: bulk

diff --git a/drivers/vfio/mdev/Kconfig b/drivers/vfio/mdev/Kconfig index a34fbc66f92f..703abd0a9bff 100644 --- a/drivers/vfio/mdev/Kconfig +++ b/drivers/vfio/mdev/Kconfig @@ -9,4 +9,10 @@ config VFIO_MDEV If you don't know what do here, say N. +config VFIO_MDEV_DEVICE + tristate "VFIO support for Mediated devices" + depends on VFIO && VFIO_MDEV + default n + help + VFIO based driver for mediated devices. diff --git a/drivers/vfio/mdev/Makefile b/drivers/vfio/mdev/Makefile index 56a75e689582..e5087ed83a34 100644 --- a/drivers/vfio/mdev/Makefile +++ b/drivers/vfio/mdev/Makefile @@ -2,4 +2,5 @@ mdev-y := mdev_core.o mdev_sysfs.o mdev_driver.o obj-$(CONFIG_VFIO_MDEV) += mdev.o +obj-$(CONFIG_VFIO_MDEV_DEVICE) += vfio_mdev.o diff --git a/drivers/vfio/mdev/vfio_mdev.c b/drivers/vfio/mdev/vfio_mdev.c new file mode 100644 index 000000000000..28f13aeaa46b --- /dev/null +++ b/drivers/vfio/mdev/vfio_mdev.c @@ -0,0 +1,467 @@ +/* + * VFIO based Mediated PCI device driver + * + * Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved. + * Author: Neo Jia <cjia@nvidia.com> + * Kirti Wankhede <kwankhede@nvidia.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/init.h> +#include <linux/module.h> +#include <linux/device.h> +#include <linux/kernel.h> +#include <linux/slab.h> +#include <linux/uuid.h> +#include <linux/vfio.h> +#include <linux/iommu.h> +#include <linux/mdev.h> + +#include "mdev_private.h" + +#define DRIVER_VERSION "0.1" +#define DRIVER_AUTHOR "NVIDIA Corporation" +#define DRIVER_DESC "VFIO based Mediated PCI device driver" + +struct vfio_mdev { + struct iommu_group *group; + struct mdev_device *mdev; + struct vfio_device_info dev_info; +}; + +static int vfio_mdev_open(void *device_data) +{ + int ret = 0; + + if (!try_module_get(THIS_MODULE)) + return -ENODEV; + + return ret; +} + +static void vfio_mdev_close(void *device_data) +{ + module_put(THIS_MODULE); +} + +static int sparse_mmap_cap(struct vfio_info_cap *caps, void *cap_type) +{ + struct vfio_info_cap_header *header; + struct vfio_region_info_cap_sparse_mmap *sparse_cap, *sparse = cap_type; + size_t size; + + size = sizeof(*sparse) + sparse->nr_areas * sizeof(*sparse->areas); + header = vfio_info_cap_add(caps, size, + VFIO_REGION_INFO_CAP_SPARSE_MMAP, 1); + if (IS_ERR(header)) + return PTR_ERR(header); + + sparse_cap = container_of(header, + struct vfio_region_info_cap_sparse_mmap, header); + sparse_cap->nr_areas = sparse->nr_areas; + memcpy(sparse_cap->areas, sparse->areas, + sparse->nr_areas * sizeof(*sparse->areas)); + return 0; +} + +static int region_type_cap(struct vfio_info_cap *caps, void *cap_type) +{ + struct vfio_info_cap_header *header; + struct vfio_region_info_cap_type *type_cap, *cap = cap_type; + + header = vfio_info_cap_add(caps, sizeof(*cap), + VFIO_REGION_INFO_CAP_TYPE, 1); + if (IS_ERR(header)) + return PTR_ERR(header); + + type_cap = container_of(header, struct vfio_region_info_cap_type, + header); + type_cap->type = cap->type; + type_cap->subtype = cap->type; + return 0; +} + +static long vfio_mdev_unlocked_ioctl(void *device_data, + unsigned int cmd, unsigned long arg) +{ + int ret = 0; + struct vfio_mdev *vmdev = device_data; + struct parent_device *parent = vmdev->mdev->parent; + unsigned long minsz; + + switch (cmd) { + case VFIO_DEVICE_GET_INFO: + { + struct vfio_device_info info; + + minsz = offsetofend(struct vfio_device_info, num_irqs); + + if (copy_from_user(&info, (void __user *)arg, minsz)) + return -EFAULT; + + if (info.argsz < minsz) + return -EINVAL; + + if (parent->ops->get_device_info) + ret = parent->ops->get_device_info(vmdev->mdev, &info); + else + return -EINVAL; + + if (ret) + return ret; + + if (parent->ops->reset) + info.flags |= VFIO_DEVICE_FLAGS_RESET; + + memcpy(&vmdev->dev_info, &info, sizeof(info)); + + return copy_to_user((void __user *)arg, &info, minsz); + } + case VFIO_DEVICE_GET_REGION_INFO: + { + struct vfio_region_info info; + struct vfio_info_cap caps = { .buf = NULL, .size = 0 }; + u16 cap_type_id = 0; + void *cap_type = NULL; + + minsz = offsetofend(struct vfio_region_info, offset); + + if (copy_from_user(&info, (void __user *)arg, minsz)) + return -EFAULT; + + if (info.argsz < minsz) + return -EINVAL; + + if (parent->ops->get_region_info) + ret = parent->ops->get_region_info(vmdev->mdev, &info, + &cap_type_id, &cap_type); + else + return -EINVAL; + + if (ret) + return ret; + + if ((info.flags & VFIO_REGION_INFO_FLAG_CAPS) && cap_type) { + switch (cap_type_id) { + case VFIO_REGION_INFO_CAP_SPARSE_MMAP: + ret = sparse_mmap_cap(&caps, cap_type); + if (ret) + return ret; + break; + + case VFIO_REGION_INFO_CAP_TYPE: + ret = region_type_cap(&caps, cap_type); + if (ret) + return ret; + break; + default: + return -EINVAL; + } + } + + if (caps.size) { + if (info.argsz < sizeof(info) + caps.size) { + info.argsz = sizeof(info) + caps.size; + info.cap_offset = 0; + } else { + vfio_info_cap_shift(&caps, sizeof(info)); + if (copy_to_user((void __user *)arg + + sizeof(info), caps.buf, + caps.size)) { + kfree(caps.buf); + return -EFAULT; + } + info.cap_offset = sizeof(info); + } + kfree(caps.buf); + } + + return copy_to_user((void __user *)arg, &info, minsz); + } + case VFIO_DEVICE_GET_IRQ_INFO: + { + struct vfio_irq_info info; + + minsz = offsetofend(struct vfio_irq_info, count); + + if (copy_from_user(&info, (void __user *)arg, minsz)) + return -EFAULT; + + if ((info.argsz < minsz) || + (info.index >= vmdev->dev_info.num_irqs)) + return -EINVAL; + + if (parent->ops->get_irq_info) + ret = parent->ops->get_irq_info(vmdev->mdev, &info); + else + return -EINVAL; + + if (ret) + return ret; + + if (info.count == -1) + return -EINVAL; + + return copy_to_user((void __user *)arg, &info, minsz); + } + case VFIO_DEVICE_SET_IRQS: + { + struct vfio_irq_set hdr; + u8 *data = NULL, *ptr = NULL; + + minsz = offsetofend(struct vfio_irq_set, count); + + if (copy_from_user(&hdr, (void __user *)arg, minsz)) + return -EFAULT; + + if ((hdr.argsz < minsz) || + (hdr.index >= vmdev->dev_info.num_irqs) || + (hdr.flags & ~(VFIO_IRQ_SET_DATA_TYPE_MASK | + VFIO_IRQ_SET_ACTION_TYPE_MASK))) + return -EINVAL; + + if (!(hdr.flags & VFIO_IRQ_SET_DATA_NONE)) { + size_t size; + + if (hdr.flags & VFIO_IRQ_SET_DATA_BOOL) + size = sizeof(uint8_t); + else if (hdr.flags & VFIO_IRQ_SET_DATA_EVENTFD) + size = sizeof(int32_t); + else + return -EINVAL; + + if (hdr.argsz - minsz < hdr.count * size) + return -EINVAL; + + ptr = data = memdup_user((void __user *)(arg + minsz), + hdr.count * size); + if (IS_ERR(data)) + return PTR_ERR(data); + } + + if (parent->ops->set_irqs) + ret = parent->ops->set_irqs(vmdev->mdev, hdr.flags, + hdr.index, hdr.start, + hdr.count, data); + else + ret = -EINVAL; + + kfree(ptr); + return ret; + } + case VFIO_DEVICE_RESET: + { + if (parent->ops->reset) + return parent->ops->reset(vmdev->mdev); + + return -EINVAL; + } + } + return -ENOTTY; +} + +static ssize_t vfio_mdev_read(void *device_data, char __user *buf, + size_t count, loff_t *ppos) +{ + struct vfio_mdev *vmdev = device_data; + struct mdev_device *mdev = vmdev->mdev; + struct parent_device *parent = mdev->parent; + unsigned int done = 0; + int ret; + + if (!parent->ops->read) + return -EINVAL; + + while (count) { + size_t filled; + + if (count >= 4 && !(*ppos % 4)) { + u32 val; + + ret = parent->ops->read(mdev, (char *)&val, sizeof(val), + *ppos); + if (ret <= 0) + goto read_err; + + if (copy_to_user(buf, &val, sizeof(val))) + goto read_err; + + filled = 4; + } else if (count >= 2 && !(*ppos % 2)) { + u16 val; + + ret = parent->ops->read(mdev, (char *)&val, sizeof(val), + *ppos); + if (ret <= 0) + goto read_err; + + if (copy_to_user(buf, &val, sizeof(val))) + goto read_err; + + filled = 2; + } else { + u8 val; + + ret = parent->ops->read(mdev, &val, sizeof(val), *ppos); + if (ret <= 0) + goto read_err; + + if (copy_to_user(buf, &val, sizeof(val))) + goto read_err; + + filled = 1; + } + + count -= filled; + done += filled; + *ppos += filled; + buf += filled; + } + + return done; + +read_err: + return -EFAULT; +} + +static ssize_t vfio_mdev_write(void *device_data, const char __user *buf, + size_t count, loff_t *ppos) +{ + struct vfio_mdev *vmdev = device_data; + struct mdev_device *mdev = vmdev->mdev; + struct parent_device *parent = mdev->parent; + unsigned int done = 0; + int ret; + + if (!parent->ops->write) + return -EINVAL; + + while (count) { + size_t filled; + + if (count >= 4 && !(*ppos % 4)) { + u32 val; + + if (copy_from_user(&val, buf, sizeof(val))) + goto write_err; + + ret = parent->ops->write(mdev, (char *)&val, + sizeof(val), *ppos); + if (ret <= 0) + goto write_err; + + filled = 4; + } else if (count >= 2 && !(*ppos % 2)) { + u16 val; + + if (copy_from_user(&val, buf, sizeof(val))) + goto write_err; + + ret = parent->ops->write(mdev, (char *)&val, + sizeof(val), *ppos); + if (ret <= 0) + goto write_err; + + filled = 2; + } else { + u8 val; + + if (copy_from_user(&val, buf, sizeof(val))) + goto write_err; + + ret = parent->ops->write(mdev, &val, sizeof(val), + *ppos); + if (ret <= 0) + goto write_err; + + filled = 1; + } + + count -= filled; + done += filled; + *ppos += filled; + buf += filled; + } + + return done; +write_err: + return -EFAULT; +} + +static int vfio_mdev_mmap(void *device_data, struct vm_area_struct *vma) +{ + struct vfio_mdev *vmdev = device_data; + struct mdev_device *mdev = vmdev->mdev; + struct parent_device *parent = mdev->parent; + + if (parent->ops->mmap) + return parent->ops->mmap(mdev, vma); + + return -EINVAL; +} + +static const struct vfio_device_ops vfio_mdev_dev_ops = { + .name = "vfio-mdev", + .open = vfio_mdev_open, + .release = vfio_mdev_close, + .ioctl = vfio_mdev_unlocked_ioctl, + .read = vfio_mdev_read, + .write = vfio_mdev_write, + .mmap = vfio_mdev_mmap, +}; + +int vfio_mdev_probe(struct device *dev) +{ + struct vfio_mdev *vmdev; + struct mdev_device *mdev = to_mdev_device(dev); + int ret; + + vmdev = kzalloc(sizeof(*vmdev), GFP_KERNEL); + if (IS_ERR(vmdev)) + return PTR_ERR(vmdev); + + vmdev->mdev = mdev_get_device(mdev); + vmdev->group = mdev->group; + + ret = vfio_add_group_dev(dev, &vfio_mdev_dev_ops, vmdev); + if (ret) + kfree(vmdev); + + mdev_put_device(mdev); + return ret; +} + +void vfio_mdev_remove(struct device *dev) +{ + struct vfio_mdev *vmdev; + + vmdev = vfio_del_group_dev(dev); + kfree(vmdev); +} + +struct mdev_driver vfio_mdev_driver = { + .name = "vfio_mdev", + .probe = vfio_mdev_probe, + .remove = vfio_mdev_remove, +}; + +static int __init vfio_mdev_init(void) +{ + return mdev_register_driver(&vfio_mdev_driver, THIS_MODULE); +} + +static void __exit vfio_mdev_exit(void) +{ + mdev_unregister_driver(&vfio_mdev_driver); +} + +module_init(vfio_mdev_init) +module_exit(vfio_mdev_exit) + +MODULE_VERSION(DRIVER_VERSION); +MODULE_LICENSE("GPL"); +MODULE_AUTHOR(DRIVER_AUTHOR); +MODULE_DESCRIPTION(DRIVER_DESC); diff --git a/drivers/vfio/pci/vfio_pci_private.h b/drivers/vfio/pci/vfio_pci_private.h index 016c14a1b454..776cc2b063d4 100644 --- a/drivers/vfio/pci/vfio_pci_private.h +++ b/drivers/vfio/pci/vfio_pci_private.h @@ -21,9 +21,9 @@ #define VFIO_PCI_OFFSET_SHIFT 40 -#define VFIO_PCI_OFFSET_TO_INDEX(off) (off >> VFIO_PCI_OFFSET_SHIFT) -#define VFIO_PCI_INDEX_TO_OFFSET(index) ((u64)(index) << VFIO_PCI_OFFSET_SHIFT) -#define VFIO_PCI_OFFSET_MASK (((u64)(1) << VFIO_PCI_OFFSET_SHIFT) - 1) +#define VFIO_PCI_OFFSET_TO_INDEX(off) (off >> VFIO_PCI_OFFSET_SHIFT) +#define VFIO_PCI_INDEX_TO_OFFSET(index) ((u64)(index) << VFIO_PCI_OFFSET_SHIFT) +#define VFIO_PCI_OFFSET_MASK (((u64)(1) << VFIO_PCI_OFFSET_SHIFT) - 1) /* Special capability IDs predefined access */ #define PCI_CAP_ID_INVALID 0xFF /* default raw access */

[v7,2/4] vfio: VFIO driver for mediated devices

Commit Message

Comments

Patch