diff mbox

[v9,01/12] vfio: Mediated device Core driver

Message ID 1476739332-4911-2-git-send-email-kwankhede@nvidia.com (mailing list archive)
State New, archived
Headers show

Commit Message

Kirti Wankhede Oct. 17, 2016, 9:22 p.m. UTC
Design for Mediated Device Driver:
Main purpose of this driver is to provide a common interface for mediated
device management that can be used by different drivers of different
devices.

This module provides a generic interface to create the device, add it to
mediated bus, add device to IOMMU group and then add it to vfio group.

Below is the high Level block diagram, with Nvidia, Intel and IBM devices
as example, since these are the devices which are going to actively use
this module as of now.

 +---------------+
 |               |
 | +-----------+ |  mdev_register_driver() +--------------+
 | |           | +<------------------------+ __init()     |
 | |  mdev     | |                         |              |
 | |  bus      | +------------------------>+              |<-> VFIO user
 | |  driver   | |     probe()/remove()    | vfio_mdev.ko |    APIs
 | |           | |                         |              |
 | +-----------+ |                         +--------------+
 |               |
 |  MDEV CORE    |
 |   MODULE      |
 |   mdev.ko     |
 | +-----------+ |  mdev_register_device() +--------------+
 | |           | +<------------------------+              |
 | |           | |                         |  nvidia.ko   |<-> physical
 | |           | +------------------------>+              |    device
 | |           | |        callback         +--------------+
 | | Physical  | |
 | |  device   | |  mdev_register_device() +--------------+
 | | interface | |<------------------------+              |
 | |           | |                         |  i915.ko     |<-> physical
 | |           | +------------------------>+              |    device
 | |           | |        callback         +--------------+
 | |           | |
 | |           | |  mdev_register_device() +--------------+
 | |           | +<------------------------+              |
 | |           | |                         | ccw_device.ko|<-> physical
 | |           | +------------------------>+              |    device
 | |           | |        callback         +--------------+
 | +-----------+ |
 +---------------+

Core driver provides two types of registration interfaces:
1. Registration interface for mediated bus driver:

/**
  * struct mdev_driver - Mediated device's driver
  * @name: driver name
  * @probe: called when new device created
  * @remove:called when device removed
  * @driver:device driver structure
  *
  **/
struct mdev_driver {
         const char *name;
         int  (*probe)  (struct device *dev);
         void (*remove) (struct device *dev);
         struct device_driver    driver;
};

Mediated bus driver for mdev device should use this interface to register
and unregister with core driver respectively:

int  mdev_register_driver(struct mdev_driver *drv, struct module *owner);
void mdev_unregister_driver(struct mdev_driver *drv);

Medisted bus driver is responsible to add/delete mediated devices to/from
VFIO group when devices are bound and unbound to the driver.

2. Physical device driver interface
This interface provides vendor driver the set APIs to manage physical
device related work in its driver. APIs are :

* dev_attr_groups: attributes of the parent device.
* mdev_attr_groups: attributes of the mediated device.
* supported_type_groups: attributes to define supported type. This is
			 mandatory field.
* create: to allocate basic resources in driver for a mediated device.
* remove: to free resources in driver when mediated device is destroyed.
* open: open callback of mediated device
* release: release callback of mediated device
* read : read emulation callback.
* write: write emulation callback.
* mmap: mmap emulation callback.
* ioctl: ioctl callback.

Drivers should use these interfaces to register and unregister device to
mdev core driver respectively:

extern int  mdev_register_device(struct device *dev,
                                 const struct parent_ops *ops);
extern void mdev_unregister_device(struct device *dev);

There are no locks to serialize above callbacks in mdev driver and
vfio_mdev driver. If required, vendor driver can have locks to serialize
above APIs in their driver.

Signed-off-by: Kirti Wankhede <kwankhede@nvidia.com>
Signed-off-by: Neo Jia <cjia@nvidia.com>
Change-Id: I73a5084574270b14541c529461ea2f03c292d510
---
 drivers/vfio/Kconfig             |   1 +
 drivers/vfio/Makefile            |   1 +
 drivers/vfio/mdev/Kconfig        |  11 ++
 drivers/vfio/mdev/Makefile       |   4 +
 drivers/vfio/mdev/mdev_core.c    | 372 +++++++++++++++++++++++++++++++++++++++
 drivers/vfio/mdev/mdev_driver.c  | 128 ++++++++++++++
 drivers/vfio/mdev/mdev_private.h |  41 +++++
 drivers/vfio/mdev/mdev_sysfs.c   | 296 +++++++++++++++++++++++++++++++
 include/linux/mdev.h             | 177 +++++++++++++++++++
 9 files changed, 1031 insertions(+)
 create mode 100644 drivers/vfio/mdev/Kconfig
 create mode 100644 drivers/vfio/mdev/Makefile
 create mode 100644 drivers/vfio/mdev/mdev_core.c
 create mode 100644 drivers/vfio/mdev/mdev_driver.c
 create mode 100644 drivers/vfio/mdev/mdev_private.h
 create mode 100644 drivers/vfio/mdev/mdev_sysfs.c
 create mode 100644 include/linux/mdev.h

Comments

Alex Williamson Oct. 18, 2016, 11:16 p.m. UTC | #1
On Tue, 18 Oct 2016 02:52:01 +0530
Kirti Wankhede <kwankhede@nvidia.com> wrote:

> Design for Mediated Device Driver:
> Main purpose of this driver is to provide a common interface for mediated
> device management that can be used by different drivers of different
> devices.
> 
> This module provides a generic interface to create the device, add it to
> mediated bus, add device to IOMMU group and then add it to vfio group.
> 
> Below is the high Level block diagram, with Nvidia, Intel and IBM devices
> as example, since these are the devices which are going to actively use
> this module as of now.
> 
>  +---------------+
>  |               |
>  | +-----------+ |  mdev_register_driver() +--------------+
>  | |           | +<------------------------+ __init()     |
>  | |  mdev     | |                         |              |
>  | |  bus      | +------------------------>+              |<-> VFIO user
>  | |  driver   | |     probe()/remove()    | vfio_mdev.ko |    APIs
>  | |           | |                         |              |
>  | +-----------+ |                         +--------------+
>  |               |
>  |  MDEV CORE    |
>  |   MODULE      |
>  |   mdev.ko     |
>  | +-----------+ |  mdev_register_device() +--------------+
>  | |           | +<------------------------+              |
>  | |           | |                         |  nvidia.ko   |<-> physical
>  | |           | +------------------------>+              |    device
>  | |           | |        callback         +--------------+
>  | | Physical  | |
>  | |  device   | |  mdev_register_device() +--------------+
>  | | interface | |<------------------------+              |
>  | |           | |                         |  i915.ko     |<-> physical
>  | |           | +------------------------>+              |    device
>  | |           | |        callback         +--------------+
>  | |           | |
>  | |           | |  mdev_register_device() +--------------+
>  | |           | +<------------------------+              |
>  | |           | |                         | ccw_device.ko|<-> physical
>  | |           | +------------------------>+              |    device
>  | |           | |        callback         +--------------+
>  | +-----------+ |
>  +---------------+
> 
> Core driver provides two types of registration interfaces:
> 1. Registration interface for mediated bus driver:
> 
> /**
>   * struct mdev_driver - Mediated device's driver
>   * @name: driver name
>   * @probe: called when new device created
>   * @remove:called when device removed
>   * @driver:device driver structure
>   *
>   **/
> struct mdev_driver {
>          const char *name;
>          int  (*probe)  (struct device *dev);
>          void (*remove) (struct device *dev);
>          struct device_driver    driver;
> };
> 
> Mediated bus driver for mdev device should use this interface to register
> and unregister with core driver respectively:
> 
> int  mdev_register_driver(struct mdev_driver *drv, struct module *owner);
> void mdev_unregister_driver(struct mdev_driver *drv);
> 
> Medisted bus driver is responsible to add/delete mediated devices to/from
> VFIO group when devices are bound and unbound to the driver.
> 
> 2. Physical device driver interface
> This interface provides vendor driver the set APIs to manage physical
> device related work in its driver. APIs are :
> 
> * dev_attr_groups: attributes of the parent device.
> * mdev_attr_groups: attributes of the mediated device.
> * supported_type_groups: attributes to define supported type. This is
> 			 mandatory field.
> * create: to allocate basic resources in driver for a mediated device.
> * remove: to free resources in driver when mediated device is destroyed.
> * open: open callback of mediated device
> * release: release callback of mediated device
> * read : read emulation callback.
> * write: write emulation callback.
> * mmap: mmap emulation callback.
> * ioctl: ioctl callback.
> 
> Drivers should use these interfaces to register and unregister device to
> mdev core driver respectively:
> 
> extern int  mdev_register_device(struct device *dev,
>                                  const struct parent_ops *ops);
> extern void mdev_unregister_device(struct device *dev);
> 
> There are no locks to serialize above callbacks in mdev driver and
> vfio_mdev driver. If required, vendor driver can have locks to serialize
> above APIs in their driver.
> 
> Signed-off-by: Kirti Wankhede <kwankhede@nvidia.com>
> Signed-off-by: Neo Jia <cjia@nvidia.com>
> Change-Id: I73a5084574270b14541c529461ea2f03c292d510
> ---
>  drivers/vfio/Kconfig             |   1 +
>  drivers/vfio/Makefile            |   1 +
>  drivers/vfio/mdev/Kconfig        |  11 ++
>  drivers/vfio/mdev/Makefile       |   4 +
>  drivers/vfio/mdev/mdev_core.c    | 372 +++++++++++++++++++++++++++++++++++++++
>  drivers/vfio/mdev/mdev_driver.c  | 128 ++++++++++++++
>  drivers/vfio/mdev/mdev_private.h |  41 +++++
>  drivers/vfio/mdev/mdev_sysfs.c   | 296 +++++++++++++++++++++++++++++++
>  include/linux/mdev.h             | 177 +++++++++++++++++++
>  9 files changed, 1031 insertions(+)
>  create mode 100644 drivers/vfio/mdev/Kconfig
>  create mode 100644 drivers/vfio/mdev/Makefile
>  create mode 100644 drivers/vfio/mdev/mdev_core.c
>  create mode 100644 drivers/vfio/mdev/mdev_driver.c
>  create mode 100644 drivers/vfio/mdev/mdev_private.h
>  create mode 100644 drivers/vfio/mdev/mdev_sysfs.c
>  create mode 100644 include/linux/mdev.h
> 
> diff --git a/drivers/vfio/Kconfig b/drivers/vfio/Kconfig
> index da6e2ce77495..23eced02aaf6 100644
> --- a/drivers/vfio/Kconfig
> +++ b/drivers/vfio/Kconfig
> @@ -48,4 +48,5 @@ menuconfig VFIO_NOIOMMU
>  
>  source "drivers/vfio/pci/Kconfig"
>  source "drivers/vfio/platform/Kconfig"
> +source "drivers/vfio/mdev/Kconfig"
>  source "virt/lib/Kconfig"
> diff --git a/drivers/vfio/Makefile b/drivers/vfio/Makefile
> index 7b8a31f63fea..4a23c13b6be4 100644
> --- a/drivers/vfio/Makefile
> +++ b/drivers/vfio/Makefile
> @@ -7,3 +7,4 @@ obj-$(CONFIG_VFIO_IOMMU_SPAPR_TCE) += vfio_iommu_spapr_tce.o
>  obj-$(CONFIG_VFIO_SPAPR_EEH) += vfio_spapr_eeh.o
>  obj-$(CONFIG_VFIO_PCI) += pci/
>  obj-$(CONFIG_VFIO_PLATFORM) += platform/
> +obj-$(CONFIG_VFIO_MDEV) += mdev/
> diff --git a/drivers/vfio/mdev/Kconfig b/drivers/vfio/mdev/Kconfig
> new file mode 100644
> index 000000000000..93addace9a67
> --- /dev/null
> +++ b/drivers/vfio/mdev/Kconfig
> @@ -0,0 +1,11 @@
> +
> +config VFIO_MDEV
> +    tristate "Mediated device driver framework"
> +    depends on VFIO
> +    default n
> +    help
> +        Provides a framework to virtualize devices which don't have SR_IOV
> +	capability built-in.
> +	See Documentation/vfio-mdev/vfio-mediated-device.txt for more details.
> +
> +        If you don't know what do here, say N.
> diff --git a/drivers/vfio/mdev/Makefile b/drivers/vfio/mdev/Makefile
> new file mode 100644
> index 000000000000..31bc04801d94
> --- /dev/null
> +++ b/drivers/vfio/mdev/Makefile
> @@ -0,0 +1,4 @@
> +
> +mdev-y := mdev_core.o mdev_sysfs.o mdev_driver.o
> +
> +obj-$(CONFIG_VFIO_MDEV) += mdev.o
> diff --git a/drivers/vfio/mdev/mdev_core.c b/drivers/vfio/mdev/mdev_core.c
> new file mode 100644
> index 000000000000..7db5ec164aeb
> --- /dev/null
> +++ b/drivers/vfio/mdev/mdev_core.c
> @@ -0,0 +1,372 @@
> +/*
> + * Mediated device Core Driver
> + *
> + * Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved.
> + *     Author: Neo Jia <cjia@nvidia.com>
> + *	       Kirti Wankhede <kwankhede@nvidia.com>
> + *
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License version 2 as
> + * published by the Free Software Foundation.
> + */
> +
> +#include <linux/module.h>
> +#include <linux/device.h>
> +#include <linux/slab.h>
> +#include <linux/uuid.h>
> +#include <linux/sysfs.h>
> +#include <linux/mdev.h>
> +
> +#include "mdev_private.h"
> +
> +#define DRIVER_VERSION		"0.1"
> +#define DRIVER_AUTHOR		"NVIDIA Corporation"
> +#define DRIVER_DESC		"Mediated device Core Driver"
> +
> +static LIST_HEAD(parent_list);
> +static DEFINE_MUTEX(parent_list_lock);
> +static struct class_compat *mdev_bus_compat_class;
> +
> +static int _find_mdev_device(struct device *dev, void *data)
> +{
> +	struct mdev_device *mdev;
> +
> +	if (!dev_is_mdev(dev))
> +		return 0;
> +
> +	mdev = to_mdev_device(dev);
> +
> +	if (uuid_le_cmp(mdev->uuid, *(uuid_le *)data) == 0)
> +		return 1;
> +
> +	return 0;
> +}
> +
> +static struct mdev_device *__find_mdev_device(struct parent_device *parent,
> +					      uuid_le uuid)
> +{
> +	struct device *dev;
> +
> +	dev = device_find_child(parent->dev, &uuid, _find_mdev_device);
> +	if (!dev)
> +		return NULL;
> +
> +	put_device(dev);
> +
> +	return to_mdev_device(dev);
> +}

This function is only used by mdev_device_create() for the purpose of
checking whether a given uuid for a parent already exists, so the
returned device is not actually used.  However, at the point where
we're using to_mdev_device() here, we don't actually hold a reference to
the device, so that function call and any possible use of the returned
pointer by the callee is invalid.  I would either turn this into a
"get" function where the callee has a device reference and needs to do
a "put" on it or change this to a "exists" test where true/false is
returned and the function cannot be later mis-used to do a device
lookup where the reference isn't actually valid.

> +
> +/* Should be called holding parent_list_lock */
> +static struct parent_device *__find_parent_device(struct device *dev)
> +{
> +	struct parent_device *parent;
> +
> +	list_for_each_entry(parent, &parent_list, next) {
> +		if (parent->dev == dev)
> +			return parent;
> +	}
> +	return NULL;
> +}
> +
> +static void mdev_release_parent(struct kref *kref)
> +{
> +	struct parent_device *parent = container_of(kref, struct parent_device,
> +						    ref);
> +	struct device *dev = parent->dev;
> +
> +	kfree(parent);
> +	put_device(dev);
> +}
> +
> +static
> +inline struct parent_device *mdev_get_parent(struct parent_device *parent)
> +{
> +	if (parent)
> +		kref_get(&parent->ref);
> +
> +	return parent;
> +}
> +
> +static inline void mdev_put_parent(struct parent_device *parent)
> +{
> +	if (parent)
> +		kref_put(&parent->ref, mdev_release_parent);
> +}
> +
> +static int mdev_device_create_ops(struct kobject *kobj,
> +				  struct mdev_device *mdev)
> +{
> +	struct parent_device *parent = mdev->parent;
> +	int ret;
> +
> +	ret = parent->ops->create(kobj, mdev);
> +	if (ret)
> +		return ret;
> +
> +	ret = sysfs_create_groups(&mdev->dev.kobj,
> +				  parent->ops->mdev_attr_groups);
> +	if (ret)
> +		parent->ops->remove(mdev);
> +
> +	return ret;
> +}
> +
> +static int mdev_device_remove_ops(struct mdev_device *mdev, bool force_remove)
> +{
> +	struct parent_device *parent = mdev->parent;
> +	int ret;
> +
> +	/*
> +	 * Vendor driver can return error if VMM or userspace application is
> +	 * using this mdev device.
> +	 */
> +	ret = parent->ops->remove(mdev);
> +	if (ret && !force_remove)
> +		return -EBUSY;
> +
> +	sysfs_remove_groups(&mdev->dev.kobj, parent->ops->mdev_attr_groups);
> +	return 0;
> +}
> +
> +static int mdev_device_remove_cb(struct device *dev, void *data)
> +{
> +	return mdev_device_remove(dev, data ? *(bool *)data : true);
> +}
> +
> +/*
> + * mdev_register_device : Register a device
> + * @dev: device structure representing parent device.
> + * @ops: Parent device operation structure to be registered.
> + *
> + * Add device to list of registered parent devices.
> + * Returns a negative value on error, otherwise 0.
> + */
> +int mdev_register_device(struct device *dev, const struct parent_ops *ops)
> +{
> +	int ret = 0;
> +	struct parent_device *parent;
> +
> +	/* check for mandatory ops */
> +	if (!ops || !ops->create || !ops->remove || !ops->supported_type_groups)
> +		return -EINVAL;
> +
> +	dev = get_device(dev);
> +	if (!dev)
> +		return -EINVAL;
> +
> +	mutex_lock(&parent_list_lock);
> +
> +	/* Check for duplicate */
> +	parent = __find_parent_device(dev);
> +	if (parent) {
> +		ret = -EEXIST;
> +		goto add_dev_err;
> +	}
> +
> +	parent = kzalloc(sizeof(*parent), GFP_KERNEL);
> +	if (!parent) {
> +		ret = -ENOMEM;
> +		goto add_dev_err;
> +	}
> +
> +	kref_init(&parent->ref);
> +
> +	parent->dev = dev;
> +	parent->ops = ops;
> +
> +	ret = parent_create_sysfs_files(parent);
> +	if (ret) {
> +		mutex_unlock(&parent_list_lock);
> +		mdev_put_parent(parent);
> +		return ret;
> +	}
> +
> +	ret = class_compat_create_link(mdev_bus_compat_class, dev, NULL);
> +	if (ret)
> +		dev_warn(dev, "Failed to create compatibility class link\n");
> +
> +	list_add(&parent->next, &parent_list);
> +	mutex_unlock(&parent_list_lock);
> +
> +	dev_info(dev, "MDEV: Registered\n");
> +	return 0;
> +
> +add_dev_err:
> +	mutex_unlock(&parent_list_lock);
> +	put_device(dev);
> +	return ret;
> +}
> +EXPORT_SYMBOL(mdev_register_device);
> +
> +/*
> + * mdev_unregister_device : Unregister a parent device
> + * @dev: device structure representing parent device.
> + *
> + * Remove device from list of registered parent devices. Give a chance to free
> + * existing mediated devices for given device.
> + */
> +
> +void mdev_unregister_device(struct device *dev)
> +{
> +	struct parent_device *parent;
> +	bool force_remove = true;
> +
> +	mutex_lock(&parent_list_lock);
> +	parent = __find_parent_device(dev);
> +
> +	if (!parent) {
> +		mutex_unlock(&parent_list_lock);
> +		return;
> +	}
> +	dev_info(dev, "MDEV: Unregistering\n");
> +
> +	/*
> +	 * Remove parent from the list and remove "mdev_supported_types"
> +	 * sysfs files so that no new mediated device could be
> +	 * created for this parent
> +	 */
> +	list_del(&parent->next);
> +	parent_remove_sysfs_files(parent);
> +
> +	mutex_unlock(&parent_list_lock);
> +
> +	class_compat_remove_link(mdev_bus_compat_class, dev, NULL);
> +
> +	device_for_each_child(dev, (void *)&force_remove,
> +			      mdev_device_remove_cb);
> +	mdev_put_parent(parent);
> +}
> +EXPORT_SYMBOL(mdev_unregister_device);
> +
> +static void mdev_device_release(struct device *dev)
> +{
> +	struct mdev_device *mdev = to_mdev_device(dev);
> +
> +	dev_dbg(&mdev->dev, "MDEV: destroying\n");
> +	kfree(mdev);
> +}
> +
> +int mdev_device_create(struct kobject *kobj, struct device *dev, uuid_le uuid)
> +{
> +	int ret;
> +	struct mdev_device *mdev;
> +	struct parent_device *parent;
> +	struct mdev_type *type = to_mdev_type(kobj);
> +
> +	parent = mdev_get_parent(type->parent);
> +	if (!parent)
> +		return -EINVAL;
> +
> +	/* Check for duplicate */
> +	mdev = __find_mdev_device(parent, uuid);
> +	if (mdev) {
> +		ret = -EEXIST;
> +		goto create_err;
> +	}

We check here whether the {parent,uuid} already exists, but what
prevents us racing with another create call with the same uuid?  ie.
neither exists at this point.  Will device_register() fail if the
device name already exists?  If so, should we just rely on the error
there and skip this duplicate check?  If not, we need a mutex to avoid
the race.

> +
> +	mdev = kzalloc(sizeof(*mdev), GFP_KERNEL);
> +	if (!mdev) {
> +		ret = -ENOMEM;
> +		goto create_err;
> +	}
> +
> +	memcpy(&mdev->uuid, &uuid, sizeof(uuid_le));
> +	mdev->parent = parent;
> +	kref_init(&mdev->ref);
> +
> +	mdev->dev.parent  = dev;
> +	mdev->dev.bus     = &mdev_bus_type;
> +	mdev->dev.release = mdev_device_release;
> +	dev_set_name(&mdev->dev, "%pUl", uuid.b);
> +
> +	ret = device_register(&mdev->dev);
> +	if (ret) {
> +		put_device(&mdev->dev);
> +		goto create_err;
> +	}
> +
> +	ret = mdev_device_create_ops(kobj, mdev);
> +	if (ret)
> +		goto create_failed;
> +
> +	ret = mdev_create_sysfs_files(&mdev->dev, type);
> +	if (ret) {
> +		mdev_device_remove_ops(mdev, true);
> +		goto create_failed;
> +	}
> +
> +	mdev->type_kobj = kobj;
> +	dev_dbg(&mdev->dev, "MDEV: created\n");
> +
> +	return ret;
> +
> +create_failed:
> +	device_unregister(&mdev->dev);
> +
> +create_err:
> +	mdev_put_parent(parent);
> +	return ret;
> +}
> +
> +int mdev_device_remove(struct device *dev, bool force_remove)
> +{
> +	struct mdev_device *mdev;
> +	struct parent_device *parent;
> +	struct mdev_type *type;
> +	int ret = 0;
> +
> +	if (!dev_is_mdev(dev))
> +		return 0;
> +
> +	mdev = to_mdev_device(dev);
> +	parent = mdev->parent;
> +	type = to_mdev_type(mdev->type_kobj);
> +
> +	ret = mdev_device_remove_ops(mdev, force_remove);
> +	if (ret)
> +		return ret;
> +
> +	mdev_remove_sysfs_files(dev, type);
> +	device_unregister(dev);
> +	mdev_put_parent(parent);
> +	return ret;
> +}
> +
> +static int __init mdev_init(void)
> +{
> +	int ret;
> +
> +	ret = mdev_bus_register();
> +	if (ret) {
> +		pr_err("Failed to register mdev bus\n");
> +		return ret;
> +	}
> +
> +	mdev_bus_compat_class = class_compat_register("mdev_bus");
> +	if (!mdev_bus_compat_class) {
> +		mdev_bus_unregister();
> +		return -ENOMEM;
> +	}
> +
> +	/*
> +	 * Attempt to load known vfio_mdev.  This gives us a working environment
> +	 * without the user needing to explicitly load vfio_mdev driver.
> +	 */
> +	request_module_nowait("vfio_mdev");
> +
> +	return ret;
> +}
> +
> +static void __exit mdev_exit(void)
> +{
> +	class_compat_unregister(mdev_bus_compat_class);
> +	mdev_bus_unregister();
> +}
> +
> +module_init(mdev_init)
> +module_exit(mdev_exit)
> +
> +MODULE_VERSION(DRIVER_VERSION);
> +MODULE_LICENSE("GPL");
> +MODULE_AUTHOR(DRIVER_AUTHOR);
> +MODULE_DESCRIPTION(DRIVER_DESC);
> diff --git a/drivers/vfio/mdev/mdev_driver.c b/drivers/vfio/mdev/mdev_driver.c
> new file mode 100644
> index 000000000000..7768ef87f528
> --- /dev/null
> +++ b/drivers/vfio/mdev/mdev_driver.c
> @@ -0,0 +1,128 @@
> +/*
> + * MDEV driver
> + *
> + * Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved.
> + *     Author: Neo Jia <cjia@nvidia.com>
> + *	       Kirti Wankhede <kwankhede@nvidia.com>
> + *
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License version 2 as
> + * published by the Free Software Foundation.
> + */
> +
> +#include <linux/device.h>
> +#include <linux/iommu.h>
> +#include <linux/mdev.h>
> +
> +#include "mdev_private.h"
> +
> +static int mdev_attach_iommu(struct mdev_device *mdev)
> +{
> +	int ret;
> +	struct iommu_group *group;
> +
> +	group = iommu_group_alloc();
> +	if (IS_ERR(group)) {
> +		dev_err(&mdev->dev, "MDEV: failed to allocate group!\n");
> +		return PTR_ERR(group);
> +	}
> +
> +	ret = iommu_group_add_device(group, &mdev->dev);
> +	if (ret) {
> +		dev_err(&mdev->dev, "MDEV: failed to add dev to group!\n");
> +		goto attach_fail;
> +	}
> +
> +	dev_info(&mdev->dev, "MDEV: group_id = %d\n",
> +				 iommu_group_id(group));
> +attach_fail:
> +	iommu_group_put(group);
> +	return ret;
> +}
> +
> +static void mdev_detach_iommu(struct mdev_device *mdev)
> +{
> +	iommu_group_remove_device(&mdev->dev);
> +	dev_info(&mdev->dev, "MDEV: detaching iommu\n");
> +}
> +
> +static int mdev_probe(struct device *dev)
> +{
> +	struct mdev_driver *drv = to_mdev_driver(dev->driver);
> +	struct mdev_device *mdev = to_mdev_device(dev);
> +	int ret;
> +
> +	ret = mdev_attach_iommu(mdev);
> +	if (ret) {
> +		dev_err(dev, "Failed to attach IOMMU\n");
> +		return ret;
> +	}
> +
> +	if (drv && drv->probe)
> +		ret = drv->probe(dev);
> +
> +	if (ret)
> +		mdev_detach_iommu(mdev);
> +
> +	return ret;
> +}
> +
> +static int mdev_remove(struct device *dev)
> +{
> +	struct mdev_driver *drv = to_mdev_driver(dev->driver);
> +	struct mdev_device *mdev = to_mdev_device(dev);
> +
> +	if (drv && drv->remove)
> +		drv->remove(dev);
> +
> +	mdev_detach_iommu(mdev);
> +
> +	return 0;
> +}
> +
> +struct bus_type mdev_bus_type = {
> +	.name		= "mdev",
> +	.probe		= mdev_probe,
> +	.remove		= mdev_remove,
> +};
> +EXPORT_SYMBOL_GPL(mdev_bus_type);
> +
> +/*
> + * mdev_register_driver - register a new MDEV driver
> + * @drv: the driver to register
> + * @owner: module owner of driver to be registered
> + *
> + * Returns a negative value on error, otherwise 0.
> + */
> +int mdev_register_driver(struct mdev_driver *drv, struct module *owner)
> +{
> +	/* initialize common driver fields */
> +	drv->driver.name = drv->name;
> +	drv->driver.bus = &mdev_bus_type;
> +	drv->driver.owner = owner;
> +
> +	/* register with core */
> +	return driver_register(&drv->driver);
> +}
> +EXPORT_SYMBOL(mdev_register_driver);
> +
> +/*
> + * mdev_unregister_driver - unregister MDEV driver
> + * @drv: the driver to unregister
> + *
> + */
> +void mdev_unregister_driver(struct mdev_driver *drv)
> +{
> +	driver_unregister(&drv->driver);
> +}
> +EXPORT_SYMBOL(mdev_unregister_driver);
> +
> +int mdev_bus_register(void)
> +{
> +	return bus_register(&mdev_bus_type);
> +}
> +
> +void mdev_bus_unregister(void)
> +{
> +	bus_unregister(&mdev_bus_type);
> +}
> diff --git a/drivers/vfio/mdev/mdev_private.h b/drivers/vfio/mdev/mdev_private.h
> new file mode 100644
> index 000000000000..000c93fcfdbd
> --- /dev/null
> +++ b/drivers/vfio/mdev/mdev_private.h
> @@ -0,0 +1,41 @@
> +/*
> + * Mediated device interal definitions
> + *
> + * Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved.
> + *     Author: Neo Jia <cjia@nvidia.com>
> + *	       Kirti Wankhede <kwankhede@nvidia.com>
> + *
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License version 2 as
> + * published by the Free Software Foundation.
> + */
> +
> +#ifndef MDEV_PRIVATE_H
> +#define MDEV_PRIVATE_H
> +
> +int  mdev_bus_register(void);
> +void mdev_bus_unregister(void);
> +
> +struct mdev_type {
> +	struct kobject kobj;
> +	struct kobject *devices_kobj;
> +	struct parent_device *parent;
> +	struct list_head next;
> +	struct attribute_group *group;
> +};
> +
> +#define to_mdev_type_attr(_attr)	\
> +	container_of(_attr, struct mdev_type_attribute, attr)
> +#define to_mdev_type(_kobj)		\
> +	container_of(_kobj, struct mdev_type, kobj)
> +
> +int  parent_create_sysfs_files(struct parent_device *parent);
> +void parent_remove_sysfs_files(struct parent_device *parent);
> +
> +int  mdev_create_sysfs_files(struct device *dev, struct mdev_type *type);
> +void mdev_remove_sysfs_files(struct device *dev, struct mdev_type *type);
> +
> +int  mdev_device_create(struct kobject *kobj, struct device *dev, uuid_le uuid);
> +int  mdev_device_remove(struct device *dev, bool force_remove);
> +
> +#endif /* MDEV_PRIVATE_H */
> diff --git a/drivers/vfio/mdev/mdev_sysfs.c b/drivers/vfio/mdev/mdev_sysfs.c
> new file mode 100644
> index 000000000000..426e35cf79d0
> --- /dev/null
> +++ b/drivers/vfio/mdev/mdev_sysfs.c
> @@ -0,0 +1,296 @@
> +/*
> + * File attributes for Mediated devices
> + *
> + * Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved.
> + *     Author: Neo Jia <cjia@nvidia.com>
> + *	       Kirti Wankhede <kwankhede@nvidia.com>
> + *
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License version 2 as
> + * published by the Free Software Foundation.
> + */
> +
> +#include <linux/sysfs.h>
> +#include <linux/ctype.h>
> +#include <linux/device.h>
> +#include <linux/slab.h>
> +#include <linux/uuid.h>
> +#include <linux/mdev.h>
> +
> +#include "mdev_private.h"
> +
> +/* Static functions */
> +
> +static ssize_t mdev_type_attr_show(struct kobject *kobj,
> +				     struct attribute *__attr, char *buf)
> +{
> +	struct mdev_type_attribute *attr = to_mdev_type_attr(__attr);
> +	struct mdev_type *type = to_mdev_type(kobj);
> +	ssize_t ret = -EIO;
> +
> +	if (attr->show)
> +		ret = attr->show(kobj, type->parent->dev, buf);
> +	return ret;
> +}
> +
> +static ssize_t mdev_type_attr_store(struct kobject *kobj,
> +				      struct attribute *__attr,
> +				      const char *buf, size_t count)
> +{
> +	struct mdev_type_attribute *attr = to_mdev_type_attr(__attr);
> +	struct mdev_type *type = to_mdev_type(kobj);
> +	ssize_t ret = -EIO;
> +
> +	if (attr->store)
> +		ret = attr->store(&type->kobj, type->parent->dev, buf, count);
> +	return ret;
> +}
> +
> +static const struct sysfs_ops mdev_type_sysfs_ops = {
> +	.show = mdev_type_attr_show,
> +	.store = mdev_type_attr_store,
> +};
> +
> +static ssize_t create_store(struct kobject *kobj, struct device *dev,
> +			    const char *buf, size_t count)
> +{
> +	char *str;
> +	uuid_le uuid;
> +	int ret;
> +
> +	if (count < UUID_STRING_LEN)
> +		return -EINVAL;


Can't we also test for something unreasonably large?


> +
> +	str = kstrndup(buf, count, GFP_KERNEL);
> +	if (!str)
> +		return -ENOMEM;
> +
> +	ret = uuid_le_to_bin(str, &uuid);

nit, we can kfree(str) here regardless of the return.

> +	if (!ret) {
> +
> +		ret = mdev_device_create(kobj, dev, uuid);
> +		if (ret)
> +			pr_err("mdev_create: Failed to create mdev device\n");

What value does this pr_err add?  It doesn't tell us why it failed and
the user will already know if failed by the return value of their write.

> +		else
> +			ret = count;
> +	}
> +
> +	kfree(str);
> +	return ret;
> +}
> +
> +MDEV_TYPE_ATTR_WO(create);
> +
> +static void mdev_type_release(struct kobject *kobj)
> +{
> +	struct mdev_type *type = to_mdev_type(kobj);
> +
> +	pr_debug("Releasing group %s\n", kobj->name);
> +	kfree(type);
> +}
> +
> +static struct kobj_type mdev_type_ktype = {
> +	.sysfs_ops = &mdev_type_sysfs_ops,
> +	.release = mdev_type_release,
> +};
> +
> +struct mdev_type *add_mdev_supported_type(struct parent_device *parent,
> +					  struct attribute_group *group)
> +{
> +	struct mdev_type *type;
> +	int ret;
> +
> +	if (!group->name) {
> +		pr_err("%s: Type name empty!\n", __func__);
> +		return ERR_PTR(-EINVAL);
> +	}
> +
> +	type = kzalloc(sizeof(*type), GFP_KERNEL);
> +	if (!type)
> +		return ERR_PTR(-ENOMEM);
> +
> +	type->kobj.kset = parent->mdev_types_kset;
> +
> +	ret = kobject_init_and_add(&type->kobj, &mdev_type_ktype, NULL,
> +				   "%s-%s", dev_driver_string(parent->dev),
> +				   group->name);
> +	if (ret) {
> +		kfree(type);
> +		return ERR_PTR(ret);
> +	}
> +
> +	ret = sysfs_create_file(&type->kobj, &mdev_type_attr_create.attr);
> +	if (ret)
> +		goto attr_create_failed;
> +
> +	type->devices_kobj = kobject_create_and_add("devices", &type->kobj);
> +	if (!type->devices_kobj) {
> +		ret = -ENOMEM;
> +		goto attr_devices_failed;
> +	}
> +
> +	ret = sysfs_create_files(&type->kobj,
> +				 (const struct attribute **)group->attrs);
> +	if (ret) {
> +		ret = -ENOMEM;
> +		goto attrs_failed;
> +	}
> +
> +	type->group = group;
> +	type->parent = parent;
> +	return type;
> +
> +attrs_failed:
> +	kobject_put(type->devices_kobj);
> +attr_devices_failed:
> +	sysfs_remove_file(&type->kobj, &mdev_type_attr_create.attr);
> +attr_create_failed:
> +	kobject_del(&type->kobj);
> +	kobject_put(&type->kobj);
> +	return ERR_PTR(ret);
> +}
> +
> +static void remove_mdev_supported_type(struct mdev_type *type)
> +{
> +	sysfs_remove_files(&type->kobj,
> +			   (const struct attribute **)type->group->attrs);
> +	kobject_put(type->devices_kobj);
> +	sysfs_remove_file(&type->kobj, &mdev_type_attr_create.attr);
> +	kobject_del(&type->kobj);
> +	kobject_put(&type->kobj);
> +}
> +
> +static int add_mdev_supported_type_groups(struct parent_device *parent)
> +{
> +	int i;
> +
> +	for (i = 0; parent->ops->supported_type_groups[i]; i++) {
> +		struct mdev_type *type;
> +
> +		type = add_mdev_supported_type(parent,
> +					parent->ops->supported_type_groups[i]);
> +		if (IS_ERR(type)) {
> +			struct mdev_type *ltype, *tmp;
> +
> +			list_for_each_entry_safe(ltype, tmp, &parent->type_list,
> +						  next) {
> +				list_del(&ltype->next);
> +				remove_mdev_supported_type(ltype);
> +			}
> +			return PTR_ERR(type);
> +		}
> +		list_add(&type->next, &parent->type_list);
> +	}
> +	return 0;
> +}
> +
> +/* mdev sysfs Functions */
> +
> +void parent_remove_sysfs_files(struct parent_device *parent)
> +{
> +	struct mdev_type *type, *tmp;
> +
> +	list_for_each_entry_safe(type, tmp, &parent->type_list, next) {
> +		list_del(&type->next);
> +		remove_mdev_supported_type(type);
> +	}
> +
> +	sysfs_remove_groups(&parent->dev->kobj, parent->ops->dev_attr_groups);
> +	kset_unregister(parent->mdev_types_kset);
> +}
> +
> +int parent_create_sysfs_files(struct parent_device *parent)
> +{
> +	int ret;
> +
> +	parent->mdev_types_kset = kset_create_and_add("mdev_supported_types",
> +					       NULL, &parent->dev->kobj);
> +
> +	if (!parent->mdev_types_kset)
> +		return -ENOMEM;
> +
> +	INIT_LIST_HEAD(&parent->type_list);
> +
> +	ret = sysfs_create_groups(&parent->dev->kobj,
> +				  parent->ops->dev_attr_groups);
> +	if (ret)
> +		goto create_err;
> +
> +	ret = add_mdev_supported_type_groups(parent);
> +	if (ret)
> +		sysfs_remove_groups(&parent->dev->kobj,
> +				    parent->ops->dev_attr_groups);
> +	else
> +		return ret;
> +
> +create_err:
> +	kset_unregister(parent->mdev_types_kset);
> +	return ret;
> +}
> +
> +static ssize_t remove_store(struct device *dev, struct device_attribute *attr,
> +			    const char *buf, size_t count)
> +{
> +	unsigned long val;
> +
> +	if (kstrtoul(buf, 0, &val) < 0)
> +		return -EINVAL;
> +
> +	if (val && device_remove_file_self(dev, attr)) {
> +		int ret;
> +
> +		ret = mdev_device_remove(dev, false);
> +		if (ret) {
> +			device_create_file(dev, attr);
> +			return ret;
> +		}
> +	}
> +
> +	return count;
> +}
> +
> +static DEVICE_ATTR_WO(remove);
> +
> +static const struct attribute *mdev_device_attrs[] = {
> +	&dev_attr_remove.attr,
> +	NULL,
> +};
> +
> +int  mdev_create_sysfs_files(struct device *dev, struct mdev_type *type)
> +{
> +	int ret;
> +
> +	ret = sysfs_create_files(&dev->kobj, mdev_device_attrs);
> +	if (ret) {
> +		pr_err("Failed to create remove sysfs entry\n");
> +		return ret;
> +	}
> +
> +	ret = sysfs_create_link(type->devices_kobj, &dev->kobj, dev_name(dev));
> +	if (ret) {
> +		pr_err("Failed to create symlink in types\n");
> +		goto device_link_failed;
> +	}
> +
> +	ret = sysfs_create_link(&dev->kobj, &type->kobj, "mdev_type");
> +	if (ret) {
> +		pr_err("Failed to create symlink in device directory\n");
> +		goto type_link_failed;
> +	}
> +
> +	return ret;
> +
> +type_link_failed:
> +	sysfs_remove_link(type->devices_kobj, dev_name(dev));
> +device_link_failed:
> +	sysfs_remove_files(&dev->kobj, mdev_device_attrs);
> +	return ret;
> +}
> +
> +void mdev_remove_sysfs_files(struct device *dev, struct mdev_type *type)
> +{
> +	sysfs_remove_link(&dev->kobj, "mdev_type");
> +	sysfs_remove_link(type->devices_kobj, dev_name(dev));
> +	sysfs_remove_files(&dev->kobj, mdev_device_attrs);
> +
> +}
> diff --git a/include/linux/mdev.h b/include/linux/mdev.h
> new file mode 100644
> index 000000000000..727209b2a67f
> --- /dev/null
> +++ b/include/linux/mdev.h
> @@ -0,0 +1,177 @@
> +/*
> + * Mediated device definition
> + *
> + * Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved.
> + *     Author: Neo Jia <cjia@nvidia.com>
> + *	       Kirti Wankhede <kwankhede@nvidia.com>
> + *
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License version 2 as
> + * published by the Free Software Foundation.
> + */
> +
> +#ifndef MDEV_H
> +#define MDEV_H
> +
> +#include <uapi/linux/vfio.h>
> +
> +struct parent_device;
> +
> +/* Mediated device */
> +struct mdev_device {
> +	struct device		dev;
> +	struct parent_device	*parent;
> +	uuid_le			uuid;
> +	void			*driver_data;
> +
> +	/* internal only */
> +	struct kref		ref;
> +	struct list_head	next;
> +	struct kobject		*type_kobj;
> +};
> +
> +
> +/**
> + * struct parent_ops - Structure to be registered for each parent device to
> + * register the device to mdev module.
> + *
> + * @owner:		The module owner.
> + * @dev_attr_groups:	Attributes of the parent device.
> + * @mdev_attr_groups:	Attributes of the mediated device.
> + * @supported_type_groups: Attributes to define supported types. It is mandatory
> + *			to provide supported types.
> + * @create:		Called to allocate basic resources in parent device's
> + *			driver for a particular mediated device. It is
> + *			mandatory to provide create ops.
> + *			@kobj: kobject of type for which 'create' is called.
> + *			@mdev: mdev_device structure on of mediated device
> + *			      that is being created
> + *			Returns integer: success (0) or error (< 0)
> + * @remove:		Called to free resources in parent device's driver for a
> + *			a mediated device. It is mandatory to provide 'remove'
> + *			ops.
> + *			@mdev: mdev_device device structure which is being
> + *			       destroyed
> + *			Returns integer: success (0) or error (< 0)
> + * @open:		Open mediated device.
> + *			@mdev: mediated device.
> + *			Returns integer: success (0) or error (< 0)
> + * @release:		release mediated device
> + *			@mdev: mediated device.
> + * @read:		Read emulation callback
> + *			@mdev: mediated device structure
> + *			@buf: read buffer
> + *			@count: number of bytes to read
> + *			@ppos: address.
> + *			Retuns number on bytes read on success or error.
> + * @write:		Write emulation callback
> + *			@mdev: mediated device structure
> + *			@buf: write buffer
> + *			@count: number of bytes to be written
> + *			@ppos: address.
> + *			Retuns number on bytes written on success or error.
> + * @ioctl:		IOCTL callback
> + *			@mdev: mediated device structure
> + *			@cmd: mediated device structure
> + *			@arg: mediated device structure
> + * @mmap:		mmap callback
> + * Parent device that support mediated device should be registered with mdev
> + * module with parent_ops structure.
> + */
> +
> +struct parent_ops {
> +	struct module   *owner;
> +	const struct attribute_group **dev_attr_groups;
> +	const struct attribute_group **mdev_attr_groups;
> +	struct attribute_group **supported_type_groups;
> +
> +	int     (*create)(struct kobject *kobj, struct mdev_device *mdev);
> +	int     (*remove)(struct mdev_device *mdev);
> +	int     (*open)(struct mdev_device *mdev);
> +	void    (*release)(struct mdev_device *mdev);
> +	ssize_t (*read)(struct mdev_device *mdev, char __user *buf,
> +			size_t count, loff_t *ppos);
> +	ssize_t (*write)(struct mdev_device *mdev, const char __user *buf,
> +			 size_t count, loff_t *ppos);
> +	ssize_t (*ioctl)(struct mdev_device *mdev, unsigned int cmd,
> +			 unsigned long arg);
> +	int	(*mmap)(struct mdev_device *mdev, struct vm_area_struct *vma);
> +};
> +
> +/* Parent Device */
> +struct parent_device {
> +	struct device		*dev;
> +	const struct parent_ops	*ops;
> +
> +	/* internal */
> +	struct kref		ref;
> +	struct list_head	next;
> +	struct kset *mdev_types_kset;
> +	struct list_head	type_list;
> +};
> +
> +/* interface for exporting mdev supported type attributes */
> +struct mdev_type_attribute {
> +	struct attribute attr;
> +	ssize_t (*show)(struct kobject *kobj, struct device *dev, char *buf);
> +	ssize_t (*store)(struct kobject *kobj, struct device *dev,
> +			 const char *buf, size_t count);
> +};
> +
> +#define MDEV_TYPE_ATTR(_name, _mode, _show, _store)		\
> +struct mdev_type_attribute mdev_type_attr_##_name =		\
> +	__ATTR(_name, _mode, _show, _store)
> +#define MDEV_TYPE_ATTR_RW(_name) \
> +	struct mdev_type_attribute mdev_type_attr_##_name = __ATTR_RW(_name)
> +#define MDEV_TYPE_ATTR_RO(_name) \
> +	struct mdev_type_attribute mdev_type_attr_##_name = __ATTR_RO(_name)
> +#define MDEV_TYPE_ATTR_WO(_name) \
> +	struct mdev_type_attribute mdev_type_attr_##_name = __ATTR_WO(_name)
> +
> +/**
> + * struct mdev_driver - Mediated device driver
> + * @name: driver name
> + * @probe: called when new device created
> + * @remove: called when device removed
> + * @driver: device driver structure
> + *
> + **/
> +struct mdev_driver {
> +	const char *name;
> +	int  (*probe)(struct device *dev);
> +	void (*remove)(struct device *dev);
> +	struct device_driver driver;
> +};
> +
> +static inline struct mdev_driver *to_mdev_driver(struct device_driver *drv)
> +{
> +	return drv ? container_of(drv, struct mdev_driver, driver) : NULL;
> +}
> +
> +static inline struct mdev_device *to_mdev_device(struct device *dev)
> +{
> +	return dev ? container_of(dev, struct mdev_device, dev) : NULL;


Do we really need this NULL dev/drv behavior?  I don't see that any of
the callers can pass NULL into these.  The PCI equivalents don't
support this behavior and it doesn't seem they need to.  Thanks,

Alex


> +}
> +
> +static inline void *mdev_get_drvdata(struct mdev_device *mdev)
> +{
> +	return mdev->driver_data;
> +}
> +
> +static inline void mdev_set_drvdata(struct mdev_device *mdev, void *data)
> +{
> +	mdev->driver_data = data;
> +}
> +
> +extern struct bus_type mdev_bus_type;
> +
> +#define dev_is_mdev(d) ((d)->bus == &mdev_bus_type)
> +
> +extern int  mdev_register_device(struct device *dev,
> +				 const struct parent_ops *ops);
> +extern void mdev_unregister_device(struct device *dev);
> +
> +extern int  mdev_register_driver(struct mdev_driver *drv, struct module *owner);
> +extern void mdev_unregister_driver(struct mdev_driver *drv);
> +
> +#endif /* MDEV_H */

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Kirti Wankhede Oct. 19, 2016, 7:16 p.m. UTC | #2
On 10/19/2016 4:46 AM, Alex Williamson wrote:
> On Tue, 18 Oct 2016 02:52:01 +0530
> Kirti Wankhede <kwankhede@nvidia.com> wrote:
> 
...
>> +static struct mdev_device *__find_mdev_device(struct parent_device *parent,
>> +					      uuid_le uuid)
>> +{
>> +	struct device *dev;
>> +
>> +	dev = device_find_child(parent->dev, &uuid, _find_mdev_device);
>> +	if (!dev)
>> +		return NULL;
>> +
>> +	put_device(dev);
>> +
>> +	return to_mdev_device(dev);
>> +}
> 
> This function is only used by mdev_device_create() for the purpose of
> checking whether a given uuid for a parent already exists, so the
> returned device is not actually used.  However, at the point where
> we're using to_mdev_device() here, we don't actually hold a reference to
> the device, so that function call and any possible use of the returned
> pointer by the callee is invalid.  I would either turn this into a
> "get" function where the callee has a device reference and needs to do
> a "put" on it or change this to a "exists" test where true/false is
> returned and the function cannot be later mis-used to do a device
> lookup where the reference isn't actually valid.
> 

I'll change it to return 0 if not found and -EEXIST if found.


>> +int mdev_device_create(struct kobject *kobj, struct device *dev, uuid_le uuid)
>> +{
>> +	int ret;
>> +	struct mdev_device *mdev;
>> +	struct parent_device *parent;
>> +	struct mdev_type *type = to_mdev_type(kobj);
>> +
>> +	parent = mdev_get_parent(type->parent);
>> +	if (!parent)
>> +		return -EINVAL;
>> +
>> +	/* Check for duplicate */
>> +	mdev = __find_mdev_device(parent, uuid);
>> +	if (mdev) {
>> +		ret = -EEXIST;
>> +		goto create_err;
>> +	}
> 
> We check here whether the {parent,uuid} already exists, but what
> prevents us racing with another create call with the same uuid?  ie.
> neither exists at this point.  Will device_register() fail if the
> device name already exists?  If so, should we just rely on the error
> there and skip this duplicate check?  If not, we need a mutex to avoid
> the race.
>

Yes, device_register() fails if device exists already with below
warning. Is it ok to dump such warning? I think, this should be fine,
right? then we can remove duplicate check.

If we want to avoid such warning, we should have duplication check.

[  610.847958] ------------[ cut here ]------------
[  610.855377] WARNING: CPU: 15 PID: 19839 at fs/sysfs/dir.c:31
sysfs_warn_dup+0x64/0x80
[  610.865798] sysfs: cannot create duplicate filename
'/devices/pci0000:80/0000:80:02.0/0000:83:00.0/0000:84:08.0/0000:85:00.0/83b8f4f2-509f-382f-3c1e-e6bfe0fa1234'
[  610.885101] Modules linked in:[  610.888039]  nvidia(POE)
vfio_iommu_type1 vfio_mdev mdev vfio nfsv4 dns_resolver nfs fscache
sb_edac edac_core x86_pkg_temp_thermal intel_powerclamp coretemp
kvm_intel kvm irqbypass crct10dif_pclmul crc32_pclmul crc32c_intel
ghash_clmulni_intel aesni_intel glue_helper lrw gf128mul ablk_helper
cryptd nfsd auth_rpcgss nfs_acl lockd mei_me grace iTCO_wdt
iTCO_vendor_support mei ipmi_si pcspkr ioatdma i2c_i801 lpc_ich shpchp
i2c_smbus mfd_core ipmi_msghandler acpi_pad uinput sunrpc xfs libcrc32c
sd_mod mgag200 drm_kms_helper syscopyarea sysfillrect sysimgblt
fb_sys_fops ttm drm igb ahci libahci ptp libata pps_core dca
i2c_algo_bit i2c_core dm_mirror dm_region_hash dm_log dm_mod [last
unloaded: mdev]
[  610.963835] CPU: 15 PID: 19839 Comm: bash Tainted: P           OE
4.8.0-next-20161013+ #0
[  610.973779] Hardware name: Supermicro
SYS-2027GR-5204A-NC024/X9DRG-HF, BIOS 1.0c 02/28/2013
[  610.983769]  ffffc90009323ae0 ffffffff813568bf ffffc90009323b30
0000000000000000
[  610.992867]  ffffc90009323b20 ffffffff81085511 0000001f00001000
ffff8808839ef000
[  611.001954]  ffff88108b30f900 ffff88109ae368e8 ffff88109ae580b0
ffff881099cc0818
[  611.011055] Call Trace:
[  611.015087]  [<ffffffff813568bf>] dump_stack+0x63/0x84
[  611.021784]  [<ffffffff81085511>] __warn+0xd1/0xf0
[  611.028115]  [<ffffffff8108558f>] warn_slowpath_fmt+0x5f/0x80
[  611.035379]  [<ffffffff812a4e80>] ? kernfs_path_from_node+0x50/0x60
[  611.043148]  [<ffffffff812a86c4>] sysfs_warn_dup+0x64/0x80
[  611.050109]  [<ffffffff812a87ae>] sysfs_create_dir_ns+0x7e/0x90
[  611.057481]  [<ffffffff81359891>] kobject_add_internal+0xc1/0x340
[  611.065018]  [<ffffffff81359d45>] kobject_add+0x75/0xd0
[  611.071635]  [<ffffffff81483829>] device_add+0x119/0x610
[  611.078314]  [<ffffffff81483d3a>] device_register+0x1a/0x20
[  611.085261]  [<ffffffffa03c748d>] mdev_device_create+0xdd/0x200 [mdev]
[  611.093143]  [<ffffffffa03c7768>] create_store+0xa8/0xe0 [mdev]
[  611.100385]  [<ffffffffa03c76ab>] mdev_type_attr_store+0x1b/0x30 [mdev]
[  611.108309]  [<ffffffff812a7d8a>] sysfs_kf_write+0x3a/0x50
[  611.115096]  [<ffffffff812a78bb>] kernfs_fop_write+0x10b/0x190
[  611.122231]  [<ffffffff81224e97>] __vfs_write+0x37/0x140
[  611.128817]  [<ffffffff811cea84>] ? handle_mm_fault+0x724/0xd80
[  611.135976]  [<ffffffff81225da2>] vfs_write+0xb2/0x1b0
[  611.142354]  [<ffffffff81003510>] ? syscall_trace_enter+0x1d0/0x2b0
[  611.149836]  [<ffffffff812271f5>] SyS_write+0x55/0xc0
[  611.156065]  [<ffffffff81003a47>] do_syscall_64+0x67/0x180
[  611.162734]  [<ffffffff816d41eb>] entry_SYSCALL64_slow_path+0x25/0x25
[  611.170345] ---[ end trace b05a73599da2ba3f ]---
[  611.175940] ------------[ cut here ]------------



>> +static ssize_t create_store(struct kobject *kobj, struct device *dev,
>> +			    const char *buf, size_t count)
>> +{
>> +	char *str;
>> +	uuid_le uuid;
>> +	int ret;
>> +
>> +	if (count < UUID_STRING_LEN)
>> +		return -EINVAL;
> 
> 
> Can't we also test for something unreasonably large?
> 

Ok. I'll add that check.

> 
>> +
>> +	str = kstrndup(buf, count, GFP_KERNEL);
>> +	if (!str)
>> +		return -ENOMEM;
>> +
>> +	ret = uuid_le_to_bin(str, &uuid);
> 
> nit, we can kfree(str) here regardless of the return.
> 
>> +	if (!ret) {
>> +
>> +		ret = mdev_device_create(kobj, dev, uuid);
>> +		if (ret)
>> +			pr_err("mdev_create: Failed to create mdev device\n");
> 
> What value does this pr_err add?  It doesn't tell us why it failed and
> the user will already know if failed by the return value of their write.
> 

Ok, will remove it.

>> +		else
>> +			ret = count;
>> +	}
>> +
>> +	kfree(str);
>> +	return ret;
>> +}

...

>> +static inline struct mdev_driver *to_mdev_driver(struct device_driver *drv)
>> +{
>> +	return drv ? container_of(drv, struct mdev_driver, driver) : NULL;
>> +}
>> +
>> +static inline struct mdev_device *to_mdev_device(struct device *dev)
>> +{
>> +	return dev ? container_of(dev, struct mdev_device, dev) : NULL;
> 
> 
> Do we really need this NULL dev/drv behavior?  I don't see that any of
> the callers can pass NULL into these.  The PCI equivalents don't
> support this behavior and it doesn't seem they need to.  Thanks,
>

Ok, I'll update that.

Kirti
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Alex Williamson Oct. 19, 2016, 10:20 p.m. UTC | #3
On Thu, 20 Oct 2016 00:46:48 +0530
Kirti Wankhede <kwankhede@nvidia.com> wrote:

> On 10/19/2016 4:46 AM, Alex Williamson wrote:
> > On Tue, 18 Oct 2016 02:52:01 +0530
> > Kirti Wankhede <kwankhede@nvidia.com> wrote:
> >   
> ...
> >> +static struct mdev_device *__find_mdev_device(struct parent_device *parent,
> >> +					      uuid_le uuid)
> >> +{
> >> +	struct device *dev;
> >> +
> >> +	dev = device_find_child(parent->dev, &uuid, _find_mdev_device);
> >> +	if (!dev)
> >> +		return NULL;
> >> +
> >> +	put_device(dev);
> >> +
> >> +	return to_mdev_device(dev);
> >> +}  
> > 
> > This function is only used by mdev_device_create() for the purpose of
> > checking whether a given uuid for a parent already exists, so the
> > returned device is not actually used.  However, at the point where
> > we're using to_mdev_device() here, we don't actually hold a reference to
> > the device, so that function call and any possible use of the returned
> > pointer by the callee is invalid.  I would either turn this into a
> > "get" function where the callee has a device reference and needs to do
> > a "put" on it or change this to a "exists" test where true/false is
> > returned and the function cannot be later mis-used to do a device
> > lookup where the reference isn't actually valid.
> >   
> 
> I'll change it to return 0 if not found and -EEXIST if found.
> 
> 
> >> +int mdev_device_create(struct kobject *kobj, struct device *dev, uuid_le uuid)
> >> +{
> >> +	int ret;
> >> +	struct mdev_device *mdev;
> >> +	struct parent_device *parent;
> >> +	struct mdev_type *type = to_mdev_type(kobj);
> >> +
> >> +	parent = mdev_get_parent(type->parent);
> >> +	if (!parent)
> >> +		return -EINVAL;
> >> +
> >> +	/* Check for duplicate */
> >> +	mdev = __find_mdev_device(parent, uuid);
> >> +	if (mdev) {
> >> +		ret = -EEXIST;
> >> +		goto create_err;
> >> +	}  
> > 
> > We check here whether the {parent,uuid} already exists, but what
> > prevents us racing with another create call with the same uuid?  ie.
> > neither exists at this point.  Will device_register() fail if the
> > device name already exists?  If so, should we just rely on the error
> > there and skip this duplicate check?  If not, we need a mutex to avoid
> > the race.
> >  
> 
> Yes, device_register() fails if device exists already with below
> warning. Is it ok to dump such warning? I think, this should be fine,
> right? then we can remove duplicate check.
> 
> If we want to avoid such warning, we should have duplication check.

We should avoid such warnings, bugs will get filed otherwise.  Thanks
for checking.  Thanks,

Alex
 
> [  610.847958] ------------[ cut here ]------------
> [  610.855377] WARNING: CPU: 15 PID: 19839 at fs/sysfs/dir.c:31
> sysfs_warn_dup+0x64/0x80
> [  610.865798] sysfs: cannot create duplicate filename
> '/devices/pci0000:80/0000:80:02.0/0000:83:00.0/0000:84:08.0/0000:85:00.0/83b8f4f2-509f-382f-3c1e-e6bfe0fa1234'
> [  610.885101] Modules linked in:[  610.888039]  nvidia(POE)
> vfio_iommu_type1 vfio_mdev mdev vfio nfsv4 dns_resolver nfs fscache
> sb_edac edac_core x86_pkg_temp_thermal intel_powerclamp coretemp
> kvm_intel kvm irqbypass crct10dif_pclmul crc32_pclmul crc32c_intel
> ghash_clmulni_intel aesni_intel glue_helper lrw gf128mul ablk_helper
> cryptd nfsd auth_rpcgss nfs_acl lockd mei_me grace iTCO_wdt
> iTCO_vendor_support mei ipmi_si pcspkr ioatdma i2c_i801 lpc_ich shpchp
> i2c_smbus mfd_core ipmi_msghandler acpi_pad uinput sunrpc xfs libcrc32c
> sd_mod mgag200 drm_kms_helper syscopyarea sysfillrect sysimgblt
> fb_sys_fops ttm drm igb ahci libahci ptp libata pps_core dca
> i2c_algo_bit i2c_core dm_mirror dm_region_hash dm_log dm_mod [last
> unloaded: mdev]
> [  610.963835] CPU: 15 PID: 19839 Comm: bash Tainted: P           OE
> 4.8.0-next-20161013+ #0
> [  610.973779] Hardware name: Supermicro
> SYS-2027GR-5204A-NC024/X9DRG-HF, BIOS 1.0c 02/28/2013
> [  610.983769]  ffffc90009323ae0 ffffffff813568bf ffffc90009323b30
> 0000000000000000
> [  610.992867]  ffffc90009323b20 ffffffff81085511 0000001f00001000
> ffff8808839ef000
> [  611.001954]  ffff88108b30f900 ffff88109ae368e8 ffff88109ae580b0
> ffff881099cc0818
> [  611.011055] Call Trace:
> [  611.015087]  [<ffffffff813568bf>] dump_stack+0x63/0x84
> [  611.021784]  [<ffffffff81085511>] __warn+0xd1/0xf0
> [  611.028115]  [<ffffffff8108558f>] warn_slowpath_fmt+0x5f/0x80
> [  611.035379]  [<ffffffff812a4e80>] ? kernfs_path_from_node+0x50/0x60
> [  611.043148]  [<ffffffff812a86c4>] sysfs_warn_dup+0x64/0x80
> [  611.050109]  [<ffffffff812a87ae>] sysfs_create_dir_ns+0x7e/0x90
> [  611.057481]  [<ffffffff81359891>] kobject_add_internal+0xc1/0x340
> [  611.065018]  [<ffffffff81359d45>] kobject_add+0x75/0xd0
> [  611.071635]  [<ffffffff81483829>] device_add+0x119/0x610
> [  611.078314]  [<ffffffff81483d3a>] device_register+0x1a/0x20
> [  611.085261]  [<ffffffffa03c748d>] mdev_device_create+0xdd/0x200 [mdev]
> [  611.093143]  [<ffffffffa03c7768>] create_store+0xa8/0xe0 [mdev]
> [  611.100385]  [<ffffffffa03c76ab>] mdev_type_attr_store+0x1b/0x30 [mdev]
> [  611.108309]  [<ffffffff812a7d8a>] sysfs_kf_write+0x3a/0x50
> [  611.115096]  [<ffffffff812a78bb>] kernfs_fop_write+0x10b/0x190
> [  611.122231]  [<ffffffff81224e97>] __vfs_write+0x37/0x140
> [  611.128817]  [<ffffffff811cea84>] ? handle_mm_fault+0x724/0xd80
> [  611.135976]  [<ffffffff81225da2>] vfs_write+0xb2/0x1b0
> [  611.142354]  [<ffffffff81003510>] ? syscall_trace_enter+0x1d0/0x2b0
> [  611.149836]  [<ffffffff812271f5>] SyS_write+0x55/0xc0
> [  611.156065]  [<ffffffff81003a47>] do_syscall_64+0x67/0x180
> [  611.162734]  [<ffffffff816d41eb>] entry_SYSCALL64_slow_path+0x25/0x25
> [  611.170345] ---[ end trace b05a73599da2ba3f ]---
> [  611.175940] ------------[ cut here ]------------
> 
> 
> 
> >> +static ssize_t create_store(struct kobject *kobj, struct device *dev,
> >> +			    const char *buf, size_t count)
> >> +{
> >> +	char *str;
> >> +	uuid_le uuid;
> >> +	int ret;
> >> +
> >> +	if (count < UUID_STRING_LEN)
> >> +		return -EINVAL;  
> > 
> > 
> > Can't we also test for something unreasonably large?
> >   
> 
> Ok. I'll add that check.
> 
> >   
> >> +
> >> +	str = kstrndup(buf, count, GFP_KERNEL);
> >> +	if (!str)
> >> +		return -ENOMEM;
> >> +
> >> +	ret = uuid_le_to_bin(str, &uuid);  
> > 
> > nit, we can kfree(str) here regardless of the return.
> >   
> >> +	if (!ret) {
> >> +
> >> +		ret = mdev_device_create(kobj, dev, uuid);
> >> +		if (ret)
> >> +			pr_err("mdev_create: Failed to create mdev device\n");  
> > 
> > What value does this pr_err add?  It doesn't tell us why it failed and
> > the user will already know if failed by the return value of their write.
> >   
> 
> Ok, will remove it.
> 
> >> +		else
> >> +			ret = count;
> >> +	}
> >> +
> >> +	kfree(str);
> >> +	return ret;
> >> +}  
> 
> ...
> 
> >> +static inline struct mdev_driver *to_mdev_driver(struct device_driver *drv)
> >> +{
> >> +	return drv ? container_of(drv, struct mdev_driver, driver) : NULL;
> >> +}
> >> +
> >> +static inline struct mdev_device *to_mdev_device(struct device *dev)
> >> +{
> >> +	return dev ? container_of(dev, struct mdev_device, dev) : NULL;  
> > 
> > 
> > Do we really need this NULL dev/drv behavior?  I don't see that any of
> > the callers can pass NULL into these.  The PCI equivalents don't
> > support this behavior and it doesn't seem they need to.  Thanks,
> >  
> 
> Ok, I'll update that.
> 
> Kirti

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Jike Song Oct. 20, 2016, 7:23 a.m. UTC | #4
On 10/18/2016 05:22 AM, Kirti Wankhede wrote:
> diff --git a/drivers/vfio/mdev/mdev_core.c b/drivers/vfio/mdev/mdev_core.c
> new file mode 100644
> index 000000000000..7db5ec164aeb
> --- /dev/null
> +++ b/drivers/vfio/mdev/mdev_core.c
> @@ -0,0 +1,372 @@
> +/*
> + * Mediated device Core Driver
> + *
> + * Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved.
> + *     Author: Neo Jia <cjia@nvidia.com>
> + *	       Kirti Wankhede <kwankhede@nvidia.com>
> + *
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License version 2 as
> + * published by the Free Software Foundation.
> + */
> +
> +#include <linux/module.h>
> +#include <linux/device.h>
> +#include <linux/slab.h>
> +#include <linux/uuid.h>
> +#include <linux/sysfs.h>
> +#include <linux/mdev.h>
> +
> +#include "mdev_private.h"
> +
> +#define DRIVER_VERSION		"0.1"
> +#define DRIVER_AUTHOR		"NVIDIA Corporation"
> +#define DRIVER_DESC		"Mediated device Core Driver"
> +
> +static LIST_HEAD(parent_list);
> +static DEFINE_MUTEX(parent_list_lock);
> +static struct class_compat *mdev_bus_compat_class;
> +

> +
> +/*
> + * mdev_register_device : Register a device
> + * @dev: device structure representing parent device.
> + * @ops: Parent device operation structure to be registered.
> + *
> + * Add device to list of registered parent devices.
> + * Returns a negative value on error, otherwise 0.
> + */
> +int mdev_register_device(struct device *dev, const struct parent_ops *ops)
> +{
> +	int ret = 0;
> +	struct parent_device *parent;
> +
> +	/* check for mandatory ops */
> +	if (!ops || !ops->create || !ops->remove || !ops->supported_type_groups)
> +		return -EINVAL;
> +
> +	dev = get_device(dev);
> +	if (!dev)
> +		return -EINVAL;
> +
> +	mutex_lock(&parent_list_lock);
> +
> +	/* Check for duplicate */
> +	parent = __find_parent_device(dev);
> +	if (parent) {
> +		ret = -EEXIST;
> +		goto add_dev_err;
> +	}
> +
> +	parent = kzalloc(sizeof(*parent), GFP_KERNEL);
> +	if (!parent) {
> +		ret = -ENOMEM;
> +		goto add_dev_err;
> +	}
> +
> +	kref_init(&parent->ref);
> +
> +	parent->dev = dev;
> +	parent->ops = ops;
> +
> +	ret = parent_create_sysfs_files(parent);
> +	if (ret) {
> +		mutex_unlock(&parent_list_lock);
> +		mdev_put_parent(parent);
> +		return ret;
> +	}
> +
> +	ret = class_compat_create_link(mdev_bus_compat_class, dev, NULL);
> +	if (ret)
> +		dev_warn(dev, "Failed to create compatibility class link\n");
> +
> +	list_add(&parent->next, &parent_list);
> +	mutex_unlock(&parent_list_lock);
> +
> +	dev_info(dev, "MDEV: Registered\n");
> +	return 0;
> +
> +add_dev_err:
> +	mutex_unlock(&parent_list_lock);
> +	put_device(dev);
> +	return ret;
> +}
> +EXPORT_SYMBOL(mdev_register_device);

> +static int __init mdev_init(void)
> +{
> +	int ret;
> +
> +	ret = mdev_bus_register();
> +	if (ret) {
> +		pr_err("Failed to register mdev bus\n");
> +		return ret;
> +	}
> +
> +	mdev_bus_compat_class = class_compat_register("mdev_bus");
> +	if (!mdev_bus_compat_class) {
> +		mdev_bus_unregister();
> +		return -ENOMEM;
> +	}
> +
> +	/*
> +	 * Attempt to load known vfio_mdev.  This gives us a working environment
> +	 * without the user needing to explicitly load vfio_mdev driver.
> +	 */
> +	request_module_nowait("vfio_mdev");
> +
> +	return ret;
> +}
> +
> +static void __exit mdev_exit(void)
> +{
> +	class_compat_unregister(mdev_bus_compat_class);
> +	mdev_bus_unregister();
> +}
> +
> +module_init(mdev_init)
> +module_exit(mdev_exit)

Hi Kirti,

There is a possible issue: mdev_bus_register is called from mdev_init,
a module_init, equal to device_initcall if builtin to vmlinux; however,
the vendor driver, say i915.ko for intel case, have to call
mdev_register_device from its module_init: at that time, mdev_init
is still not called.

Not sure if this issue exists with nvidia.ko. Though in most cases we
are expecting users select mdev as a standalone module, we still won't
break builtin case.


Hi Alex, do you have any suggestion here?


--
Thanks,
Jike
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Alex Williamson Oct. 20, 2016, 5:12 p.m. UTC | #5
On Thu, 20 Oct 2016 15:23:53 +0800
Jike Song <jike.song@intel.com> wrote:

> On 10/18/2016 05:22 AM, Kirti Wankhede wrote:
> > diff --git a/drivers/vfio/mdev/mdev_core.c b/drivers/vfio/mdev/mdev_core.c
> > new file mode 100644
> > index 000000000000..7db5ec164aeb
> > --- /dev/null
> > +++ b/drivers/vfio/mdev/mdev_core.c
> > @@ -0,0 +1,372 @@
> > +/*
> > + * Mediated device Core Driver
> > + *
> > + * Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved.
> > + *     Author: Neo Jia <cjia@nvidia.com>
> > + *	       Kirti Wankhede <kwankhede@nvidia.com>
> > + *
> > + * This program is free software; you can redistribute it and/or modify
> > + * it under the terms of the GNU General Public License version 2 as
> > + * published by the Free Software Foundation.
> > + */
> > +
> > +#include <linux/module.h>
> > +#include <linux/device.h>
> > +#include <linux/slab.h>
> > +#include <linux/uuid.h>
> > +#include <linux/sysfs.h>
> > +#include <linux/mdev.h>
> > +
> > +#include "mdev_private.h"
> > +
> > +#define DRIVER_VERSION		"0.1"
> > +#define DRIVER_AUTHOR		"NVIDIA Corporation"
> > +#define DRIVER_DESC		"Mediated device Core Driver"
> > +
> > +static LIST_HEAD(parent_list);
> > +static DEFINE_MUTEX(parent_list_lock);
> > +static struct class_compat *mdev_bus_compat_class;
> > +  
> 
> > +
> > +/*
> > + * mdev_register_device : Register a device
> > + * @dev: device structure representing parent device.
> > + * @ops: Parent device operation structure to be registered.
> > + *
> > + * Add device to list of registered parent devices.
> > + * Returns a negative value on error, otherwise 0.
> > + */
> > +int mdev_register_device(struct device *dev, const struct parent_ops *ops)
> > +{
> > +	int ret = 0;
> > +	struct parent_device *parent;
> > +
> > +	/* check for mandatory ops */
> > +	if (!ops || !ops->create || !ops->remove || !ops->supported_type_groups)
> > +		return -EINVAL;
> > +
> > +	dev = get_device(dev);
> > +	if (!dev)
> > +		return -EINVAL;
> > +
> > +	mutex_lock(&parent_list_lock);
> > +
> > +	/* Check for duplicate */
> > +	parent = __find_parent_device(dev);
> > +	if (parent) {
> > +		ret = -EEXIST;
> > +		goto add_dev_err;
> > +	}
> > +
> > +	parent = kzalloc(sizeof(*parent), GFP_KERNEL);
> > +	if (!parent) {
> > +		ret = -ENOMEM;
> > +		goto add_dev_err;
> > +	}
> > +
> > +	kref_init(&parent->ref);
> > +
> > +	parent->dev = dev;
> > +	parent->ops = ops;
> > +
> > +	ret = parent_create_sysfs_files(parent);
> > +	if (ret) {
> > +		mutex_unlock(&parent_list_lock);
> > +		mdev_put_parent(parent);
> > +		return ret;
> > +	}
> > +
> > +	ret = class_compat_create_link(mdev_bus_compat_class, dev, NULL);
> > +	if (ret)
> > +		dev_warn(dev, "Failed to create compatibility class link\n");
> > +
> > +	list_add(&parent->next, &parent_list);
> > +	mutex_unlock(&parent_list_lock);
> > +
> > +	dev_info(dev, "MDEV: Registered\n");
> > +	return 0;
> > +
> > +add_dev_err:
> > +	mutex_unlock(&parent_list_lock);
> > +	put_device(dev);
> > +	return ret;
> > +}
> > +EXPORT_SYMBOL(mdev_register_device);  
> 
> > +static int __init mdev_init(void)
> > +{
> > +	int ret;
> > +
> > +	ret = mdev_bus_register();
> > +	if (ret) {
> > +		pr_err("Failed to register mdev bus\n");
> > +		return ret;
> > +	}
> > +
> > +	mdev_bus_compat_class = class_compat_register("mdev_bus");
> > +	if (!mdev_bus_compat_class) {
> > +		mdev_bus_unregister();
> > +		return -ENOMEM;
> > +	}
> > +
> > +	/*
> > +	 * Attempt to load known vfio_mdev.  This gives us a working environment
> > +	 * without the user needing to explicitly load vfio_mdev driver.
> > +	 */
> > +	request_module_nowait("vfio_mdev");
> > +
> > +	return ret;
> > +}
> > +
> > +static void __exit mdev_exit(void)
> > +{
> > +	class_compat_unregister(mdev_bus_compat_class);
> > +	mdev_bus_unregister();
> > +}
> > +
> > +module_init(mdev_init)
> > +module_exit(mdev_exit)  
> 
> Hi Kirti,
> 
> There is a possible issue: mdev_bus_register is called from mdev_init,
> a module_init, equal to device_initcall if builtin to vmlinux; however,
> the vendor driver, say i915.ko for intel case, have to call
> mdev_register_device from its module_init: at that time, mdev_init
> is still not called.
> 
> Not sure if this issue exists with nvidia.ko. Though in most cases we
> are expecting users select mdev as a standalone module, we still won't
> break builtin case.
> 
> 
> Hi Alex, do you have any suggestion here?

To fully solve the problem of built-in drivers making use of the mdev
infrastructure we'd need to make mdev itself builtin and possibly a
subsystem that is initialized prior to device drivers.  Is that really
necessary?  Even though i915.ko is often loaded as part of an
initramfs, most systems still build it as a module.  I would expect
that standard module dependencies will pull in the necessary mdev and
vfio modules to make this work correctly.  I can't say that I'm
prepared to make mdev be a subsystem as would be necessary for builtin
drivers to make use of.  Perhaps if such a driver exists it could
somehow do late binding with mdev.  i915 should certainly be tested as
a builtin driver to make sure it doesn't fail with mdev support added.
The kvm-vfio device (virt/kvm/vfio.c) makes use of symbol tricks to
avoid hard dependencies between kvm and vfio, perhaps when builtin to
the kernel, i915 could use something like that.  Thanks,

Alex
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Jike Song Oct. 21, 2016, 2:41 a.m. UTC | #6
On 10/21/2016 01:12 AM, Alex Williamson wrote:
> On Thu, 20 Oct 2016 15:23:53 +0800
> Jike Song <jike.song@intel.com> wrote:
> 
>> On 10/18/2016 05:22 AM, Kirti Wankhede wrote:
>>> diff --git a/drivers/vfio/mdev/mdev_core.c b/drivers/vfio/mdev/mdev_core.c
>>> new file mode 100644
>>> index 000000000000..7db5ec164aeb
>>> --- /dev/null
>>> +++ b/drivers/vfio/mdev/mdev_core.c
>>> @@ -0,0 +1,372 @@
>>> +/*
>>> + * Mediated device Core Driver
>>> + *
>>> + * Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved.
>>> + *     Author: Neo Jia <cjia@nvidia.com>
>>> + *	       Kirti Wankhede <kwankhede@nvidia.com>
>>> + *
>>> + * This program is free software; you can redistribute it and/or modify
>>> + * it under the terms of the GNU General Public License version 2 as
>>> + * published by the Free Software Foundation.
>>> + */
>>> +
>>> +#include <linux/module.h>
>>> +#include <linux/device.h>
>>> +#include <linux/slab.h>
>>> +#include <linux/uuid.h>
>>> +#include <linux/sysfs.h>
>>> +#include <linux/mdev.h>
>>> +
>>> +#include "mdev_private.h"
>>> +
>>> +#define DRIVER_VERSION		"0.1"
>>> +#define DRIVER_AUTHOR		"NVIDIA Corporation"
>>> +#define DRIVER_DESC		"Mediated device Core Driver"
>>> +
>>> +static LIST_HEAD(parent_list);
>>> +static DEFINE_MUTEX(parent_list_lock);
>>> +static struct class_compat *mdev_bus_compat_class;
>>> +  
>>
>>> +
>>> +/*
>>> + * mdev_register_device : Register a device
>>> + * @dev: device structure representing parent device.
>>> + * @ops: Parent device operation structure to be registered.
>>> + *
>>> + * Add device to list of registered parent devices.
>>> + * Returns a negative value on error, otherwise 0.
>>> + */
>>> +int mdev_register_device(struct device *dev, const struct parent_ops *ops)
>>> +{
>>> +	int ret = 0;
>>> +	struct parent_device *parent;
>>> +
>>> +	/* check for mandatory ops */
>>> +	if (!ops || !ops->create || !ops->remove || !ops->supported_type_groups)
>>> +		return -EINVAL;
>>> +
>>> +	dev = get_device(dev);
>>> +	if (!dev)
>>> +		return -EINVAL;
>>> +
>>> +	mutex_lock(&parent_list_lock);
>>> +
>>> +	/* Check for duplicate */
>>> +	parent = __find_parent_device(dev);
>>> +	if (parent) {
>>> +		ret = -EEXIST;
>>> +		goto add_dev_err;
>>> +	}
>>> +
>>> +	parent = kzalloc(sizeof(*parent), GFP_KERNEL);
>>> +	if (!parent) {
>>> +		ret = -ENOMEM;
>>> +		goto add_dev_err;
>>> +	}
>>> +
>>> +	kref_init(&parent->ref);
>>> +
>>> +	parent->dev = dev;
>>> +	parent->ops = ops;
>>> +
>>> +	ret = parent_create_sysfs_files(parent);
>>> +	if (ret) {
>>> +		mutex_unlock(&parent_list_lock);
>>> +		mdev_put_parent(parent);
>>> +		return ret;
>>> +	}
>>> +
>>> +	ret = class_compat_create_link(mdev_bus_compat_class, dev, NULL);
>>> +	if (ret)
>>> +		dev_warn(dev, "Failed to create compatibility class link\n");
>>> +
>>> +	list_add(&parent->next, &parent_list);
>>> +	mutex_unlock(&parent_list_lock);
>>> +
>>> +	dev_info(dev, "MDEV: Registered\n");
>>> +	return 0;
>>> +
>>> +add_dev_err:
>>> +	mutex_unlock(&parent_list_lock);
>>> +	put_device(dev);
>>> +	return ret;
>>> +}
>>> +EXPORT_SYMBOL(mdev_register_device);  
>>
>>> +static int __init mdev_init(void)
>>> +{
>>> +	int ret;
>>> +
>>> +	ret = mdev_bus_register();
>>> +	if (ret) {
>>> +		pr_err("Failed to register mdev bus\n");
>>> +		return ret;
>>> +	}
>>> +
>>> +	mdev_bus_compat_class = class_compat_register("mdev_bus");
>>> +	if (!mdev_bus_compat_class) {
>>> +		mdev_bus_unregister();
>>> +		return -ENOMEM;
>>> +	}
>>> +
>>> +	/*
>>> +	 * Attempt to load known vfio_mdev.  This gives us a working environment
>>> +	 * without the user needing to explicitly load vfio_mdev driver.
>>> +	 */
>>> +	request_module_nowait("vfio_mdev");
>>> +
>>> +	return ret;
>>> +}
>>> +
>>> +static void __exit mdev_exit(void)
>>> +{
>>> +	class_compat_unregister(mdev_bus_compat_class);
>>> +	mdev_bus_unregister();
>>> +}
>>> +
>>> +module_init(mdev_init)
>>> +module_exit(mdev_exit)  
>>
>> Hi Kirti,
>>
>> There is a possible issue: mdev_bus_register is called from mdev_init,
>> a module_init, equal to device_initcall if builtin to vmlinux; however,
>> the vendor driver, say i915.ko for intel case, have to call
>> mdev_register_device from its module_init: at that time, mdev_init
>> is still not called.
>>
>> Not sure if this issue exists with nvidia.ko. Though in most cases we
>> are expecting users select mdev as a standalone module, we still won't
>> break builtin case.
>>
>>
>> Hi Alex, do you have any suggestion here?
> 
> To fully solve the problem of built-in drivers making use of the mdev
> infrastructure we'd need to make mdev itself builtin and possibly a
> subsystem that is initialized prior to device drivers.  Is that really
> necessary?  Even though i915.ko is often loaded as part of an
> initramfs, most systems still build it as a module.  I would expect
> that standard module dependencies will pull in the necessary mdev and
> vfio modules to make this work correctly.  I can't say that I'm
> prepared to make mdev be a subsystem as would be necessary for builtin
> drivers to make use of.  Perhaps if such a driver exists it could
> somehow do late binding with mdev.  i915 should certainly be tested as
> a builtin driver to make sure it doesn't fail with mdev support added.
> The kvm-vfio device (virt/kvm/vfio.c) makes use of symbol tricks to
> avoid hard dependencies between kvm and vfio, perhaps when builtin to
> the kernel, i915 could use something like that.  Thanks,

Fair enough, I'll use symbol_get to avoid the problem. Thanks!

--
Thanks,
Jike

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Tian, Kevin Oct. 26, 2016, 6:52 a.m. UTC | #7
> From: Kirti Wankhede [mailto:kwankhede@nvidia.com]
> Sent: Tuesday, October 18, 2016 5:22 AM
> 
> Design for Mediated Device Driver:
> Main purpose of this driver is to provide a common interface for mediated
> device management that can be used by different drivers of different
> devices.
> 
> This module provides a generic interface to create the device, add it to
> mediated bus, add device to IOMMU group and then add it to vfio group.
> 
> Below is the high Level block diagram, with Nvidia, Intel and IBM devices
> as example, since these are the devices which are going to actively use
> this module as of now.
> 
>  +---------------+
>  |               |
>  | +-----------+ |  mdev_register_driver() +--------------+
>  | |           | +<------------------------+ __init()     |
>  | |  mdev     | |                         |              |
>  | |  bus      | +------------------------>+              |<-> VFIO user
>  | |  driver   | |     probe()/remove()    | vfio_mdev.ko |    APIs
>  | |           | |                         |              |
>  | +-----------+ |                         +--------------+
>  |               |
>  |  MDEV CORE    |
>  |   MODULE      |
>  |   mdev.ko     |
>  | +-----------+ |  mdev_register_device() +--------------+
>  | |           | +<------------------------+              |
>  | |           | |                         |  nvidia.ko   |<-> physical
>  | |           | +------------------------>+              |    device
>  | |           | |        callback         +--------------+
>  | | Physical  | |
>  | |  device   | |  mdev_register_device() +--------------+
>  | | interface | |<------------------------+              |
>  | |           | |                         |  i915.ko     |<-> physical
>  | |           | +------------------------>+              |    device
>  | |           | |        callback         +--------------+
>  | |           | |
>  | |           | |  mdev_register_device() +--------------+
>  | |           | +<------------------------+              |
>  | |           | |                         | ccw_device.ko|<-> physical
>  | |           | +------------------------>+              |    device
>  | |           | |        callback         +--------------+
>  | +-----------+ |
>  +---------------+
> 
> Core driver provides two types of registration interfaces:
> 1. Registration interface for mediated bus driver:
> 
> /**
>   * struct mdev_driver - Mediated device's driver
>   * @name: driver name
>   * @probe: called when new device created
>   * @remove:called when device removed
>   * @driver:device driver structure
>   *
>   **/
> struct mdev_driver {
>          const char *name;
>          int  (*probe)  (struct device *dev);
>          void (*remove) (struct device *dev);
>          struct device_driver    driver;
> };
> 
> Mediated bus driver for mdev device should use this interface to register
> and unregister with core driver respectively:
> 
> int  mdev_register_driver(struct mdev_driver *drv, struct module *owner);
> void mdev_unregister_driver(struct mdev_driver *drv);
> 
> Medisted bus driver is responsible to add/delete mediated devices to/from

Medisted -> Mediated

> VFIO group when devices are bound and unbound to the driver.
> 
> 2. Physical device driver interface
> This interface provides vendor driver the set APIs to manage physical
> device related work in its driver. APIs are :
> 
> * dev_attr_groups: attributes of the parent device.
> * mdev_attr_groups: attributes of the mediated device.
> * supported_type_groups: attributes to define supported type. This is
> 			 mandatory field.
> * create: to allocate basic resources in driver for a mediated device.

in 'which driver'? it should be clear to remove 'in driver' here

> * remove: to free resources in driver when mediated device is destroyed.
> * open: open callback of mediated device
> * release: release callback of mediated device
> * read : read emulation callback.
> * write: write emulation callback.
> * mmap: mmap emulation callback.
> * ioctl: ioctl callback.

You only highlight 'mandatory field' for supported_type_groups. What
about other fields? Are all of them optional? Please clarify and also
stay consistent to later code comment.

> 
> Drivers should use these interfaces to register and unregister device to
> mdev core driver respectively:
> 
> extern int  mdev_register_device(struct device *dev,
>                                  const struct parent_ops *ops);
> extern void mdev_unregister_device(struct device *dev);
> 
> There are no locks to serialize above callbacks in mdev driver and
> vfio_mdev driver. If required, vendor driver can have locks to serialize
> above APIs in their driver.
> 
> Signed-off-by: Kirti Wankhede <kwankhede@nvidia.com>
> Signed-off-by: Neo Jia <cjia@nvidia.com>
> Change-Id: I73a5084574270b14541c529461ea2f03c292d510
> ---
>  drivers/vfio/Kconfig             |   1 +
>  drivers/vfio/Makefile            |   1 +
>  drivers/vfio/mdev/Kconfig        |  11 ++
>  drivers/vfio/mdev/Makefile       |   4 +
>  drivers/vfio/mdev/mdev_core.c    | 372
> +++++++++++++++++++++++++++++++++++++++
>  drivers/vfio/mdev/mdev_driver.c  | 128 ++++++++++++++
>  drivers/vfio/mdev/mdev_private.h |  41 +++++
>  drivers/vfio/mdev/mdev_sysfs.c   | 296
> +++++++++++++++++++++++++++++++
>  include/linux/mdev.h             | 177 +++++++++++++++++++
>  9 files changed, 1031 insertions(+)
>  create mode 100644 drivers/vfio/mdev/Kconfig
>  create mode 100644 drivers/vfio/mdev/Makefile
>  create mode 100644 drivers/vfio/mdev/mdev_core.c
>  create mode 100644 drivers/vfio/mdev/mdev_driver.c
>  create mode 100644 drivers/vfio/mdev/mdev_private.h
>  create mode 100644 drivers/vfio/mdev/mdev_sysfs.c
>  create mode 100644 include/linux/mdev.h
> 
> diff --git a/drivers/vfio/Kconfig b/drivers/vfio/Kconfig
> index da6e2ce77495..23eced02aaf6 100644
> --- a/drivers/vfio/Kconfig
> +++ b/drivers/vfio/Kconfig
> @@ -48,4 +48,5 @@ menuconfig VFIO_NOIOMMU
> 
>  source "drivers/vfio/pci/Kconfig"
>  source "drivers/vfio/platform/Kconfig"
> +source "drivers/vfio/mdev/Kconfig"
>  source "virt/lib/Kconfig"
> diff --git a/drivers/vfio/Makefile b/drivers/vfio/Makefile
> index 7b8a31f63fea..4a23c13b6be4 100644
> --- a/drivers/vfio/Makefile
> +++ b/drivers/vfio/Makefile
> @@ -7,3 +7,4 @@ obj-$(CONFIG_VFIO_IOMMU_SPAPR_TCE) +=
> vfio_iommu_spapr_tce.o
>  obj-$(CONFIG_VFIO_SPAPR_EEH) += vfio_spapr_eeh.o
>  obj-$(CONFIG_VFIO_PCI) += pci/
>  obj-$(CONFIG_VFIO_PLATFORM) += platform/
> +obj-$(CONFIG_VFIO_MDEV) += mdev/
> diff --git a/drivers/vfio/mdev/Kconfig b/drivers/vfio/mdev/Kconfig
> new file mode 100644
> index 000000000000..93addace9a67
> --- /dev/null
> +++ b/drivers/vfio/mdev/Kconfig
> @@ -0,0 +1,11 @@
> +
> +config VFIO_MDEV
> +    tristate "Mediated device driver framework"
> +    depends on VFIO
> +    default n
> +    help
> +        Provides a framework to virtualize devices which don't have SR_IOV
> +	capability built-in.

This statement is not accurate. A device can support SR-IOV, but in the same
time using this mediated technology w/ SR-IOV capability disabled.

> +	See Documentation/vfio-mdev/vfio-mediated-device.txt for more details.
> +
> +        If you don't know what do here, say N.
> diff --git a/drivers/vfio/mdev/Makefile b/drivers/vfio/mdev/Makefile
> new file mode 100644
> index 000000000000..31bc04801d94
> --- /dev/null
> +++ b/drivers/vfio/mdev/Makefile
> @@ -0,0 +1,4 @@
> +
> +mdev-y := mdev_core.o mdev_sysfs.o mdev_driver.o
> +
> +obj-$(CONFIG_VFIO_MDEV) += mdev.o
> diff --git a/drivers/vfio/mdev/mdev_core.c b/drivers/vfio/mdev/mdev_core.c
> new file mode 100644
> index 000000000000..7db5ec164aeb
> --- /dev/null
> +++ b/drivers/vfio/mdev/mdev_core.c
> @@ -0,0 +1,372 @@
> +/*
> + * Mediated device Core Driver
> + *
> + * Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved.
> + *     Author: Neo Jia <cjia@nvidia.com>
> + *	       Kirti Wankhede <kwankhede@nvidia.com>
> + *
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License version 2 as
> + * published by the Free Software Foundation.
> + */
> +
> +#include <linux/module.h>
> +#include <linux/device.h>
> +#include <linux/slab.h>
> +#include <linux/uuid.h>
> +#include <linux/sysfs.h>
> +#include <linux/mdev.h>
> +
> +#include "mdev_private.h"
> +
> +#define DRIVER_VERSION		"0.1"
> +#define DRIVER_AUTHOR		"NVIDIA Corporation"
> +#define DRIVER_DESC		"Mediated device Core Driver"
> +
> +static LIST_HEAD(parent_list);
> +static DEFINE_MUTEX(parent_list_lock);
> +static struct class_compat *mdev_bus_compat_class;
> +
> +static int _find_mdev_device(struct device *dev, void *data)
> +{
> +	struct mdev_device *mdev;
> +
> +	if (!dev_is_mdev(dev))
> +		return 0;
> +
> +	mdev = to_mdev_device(dev);
> +
> +	if (uuid_le_cmp(mdev->uuid, *(uuid_le *)data) == 0)
> +		return 1;
> +
> +	return 0;
> +}
> +
> +static struct mdev_device *__find_mdev_device(struct parent_device *parent,
> +					      uuid_le uuid)

parent_find_mdev_device?

> +{
> +	struct device *dev;
> +
> +	dev = device_find_child(parent->dev, &uuid, _find_mdev_device);
> +	if (!dev)
> +		return NULL;
> +
> +	put_device(dev);
> +
> +	return to_mdev_device(dev);
> +}
> +
> +/* Should be called holding parent_list_lock */
> +static struct parent_device *__find_parent_device(struct device *dev)
> +{
> +	struct parent_device *parent;
> +
> +	list_for_each_entry(parent, &parent_list, next) {
> +		if (parent->dev == dev)
> +			return parent;
> +	}
> +	return NULL;
> +}
> +
> +static void mdev_release_parent(struct kref *kref)
> +{
> +	struct parent_device *parent = container_of(kref, struct parent_device,
> +						    ref);
> +	struct device *dev = parent->dev;
> +
> +	kfree(parent);
> +	put_device(dev);
> +}
> +
> +static
> +inline struct parent_device *mdev_get_parent(struct parent_device *parent)
> +{
> +	if (parent)
> +		kref_get(&parent->ref);
> +
> +	return parent;
> +}
> +
> +static inline void mdev_put_parent(struct parent_device *parent)
> +{
> +	if (parent)
> +		kref_put(&parent->ref, mdev_release_parent);
> +}
> +
> +static int mdev_device_create_ops(struct kobject *kobj,
> +				  struct mdev_device *mdev)
> +{
> +	struct parent_device *parent = mdev->parent;
> +	int ret;
> +
> +	ret = parent->ops->create(kobj, mdev);
> +	if (ret)
> +		return ret;
> +
> +	ret = sysfs_create_groups(&mdev->dev.kobj,
> +				  parent->ops->mdev_attr_groups);
> +	if (ret)
> +		parent->ops->remove(mdev);
> +
> +	return ret;
> +}
> +
> +static int mdev_device_remove_ops(struct mdev_device *mdev, bool force_remove)
> +{


Can you add some comment here about when force_remove may be expected
here, which would help others understand immediately instead of walking through
the whole patch set?


> +	struct parent_device *parent = mdev->parent;
> +	int ret;
> +
> +	/*
> +	 * Vendor driver can return error if VMM or userspace application is
> +	 * using this mdev device.
> +	 */
> +	ret = parent->ops->remove(mdev);

what about passing force_remove flag to remove callback, so vendor driver
can decide whether any force cleanup required?

> +	if (ret && !force_remove)
> +		return -EBUSY;
> +
> +	sysfs_remove_groups(&mdev->dev.kobj, parent->ops->mdev_attr_groups);
> +	return 0;
> +}
> +
> +static int mdev_device_remove_cb(struct device *dev, void *data)
> +{
> +	return mdev_device_remove(dev, data ? *(bool *)data : true);
> +}
> +
> +/*
> + * mdev_register_device : Register a device
> + * @dev: device structure representing parent device.
> + * @ops: Parent device operation structure to be registered.
> + *
> + * Add device to list of registered parent devices.
> + * Returns a negative value on error, otherwise 0.
> + */
> +int mdev_register_device(struct device *dev, const struct parent_ops *ops)
> +{
> +	int ret = 0;
> +	struct parent_device *parent;
> +
> +	/* check for mandatory ops */
> +	if (!ops || !ops->create || !ops->remove || !ops->supported_type_groups)
> +		return -EINVAL;
> +
> +	dev = get_device(dev);
> +	if (!dev)
> +		return -EINVAL;
> +
> +	mutex_lock(&parent_list_lock);
> +
> +	/* Check for duplicate */
> +	parent = __find_parent_device(dev);
> +	if (parent) {
> +		ret = -EEXIST;
> +		goto add_dev_err;
> +	}
> +
> +	parent = kzalloc(sizeof(*parent), GFP_KERNEL);
> +	if (!parent) {
> +		ret = -ENOMEM;
> +		goto add_dev_err;
> +	}
> +
> +	kref_init(&parent->ref);
> +
> +	parent->dev = dev;
> +	parent->ops = ops;
> +
> +	ret = parent_create_sysfs_files(parent);
> +	if (ret) {
> +		mutex_unlock(&parent_list_lock);
> +		mdev_put_parent(parent);
> +		return ret;
> +	}
> +
> +	ret = class_compat_create_link(mdev_bus_compat_class, dev, NULL);
> +	if (ret)
> +		dev_warn(dev, "Failed to create compatibility class link\n");
> +
> +	list_add(&parent->next, &parent_list);
> +	mutex_unlock(&parent_list_lock);
> +
> +	dev_info(dev, "MDEV: Registered\n");
> +	return 0;
> +
> +add_dev_err:
> +	mutex_unlock(&parent_list_lock);
> +	put_device(dev);
> +	return ret;
> +}
> +EXPORT_SYMBOL(mdev_register_device);
> +
> +/*
> + * mdev_unregister_device : Unregister a parent device
> + * @dev: device structure representing parent device.
> + *
> + * Remove device from list of registered parent devices. Give a chance to free
> + * existing mediated devices for given device.
> + */
> +
> +void mdev_unregister_device(struct device *dev)
> +{
> +	struct parent_device *parent;
> +	bool force_remove = true;
> +
> +	mutex_lock(&parent_list_lock);
> +	parent = __find_parent_device(dev);
> +
> +	if (!parent) {
> +		mutex_unlock(&parent_list_lock);
> +		return;
> +	}
> +	dev_info(dev, "MDEV: Unregistering\n");
> +
> +	/*
> +	 * Remove parent from the list and remove "mdev_supported_types"
> +	 * sysfs files so that no new mediated device could be
> +	 * created for this parent
> +	 */
> +	list_del(&parent->next);
> +	parent_remove_sysfs_files(parent);
> +
> +	mutex_unlock(&parent_list_lock);
> +
> +	class_compat_remove_link(mdev_bus_compat_class, dev, NULL);
> +
> +	device_for_each_child(dev, (void *)&force_remove,
> +			      mdev_device_remove_cb);
> +	mdev_put_parent(parent);
> +}
> +EXPORT_SYMBOL(mdev_unregister_device);
> +
> +static void mdev_device_release(struct device *dev)
> +{
> +	struct mdev_device *mdev = to_mdev_device(dev);
> +
> +	dev_dbg(&mdev->dev, "MDEV: destroying\n");
> +	kfree(mdev);
> +}
> +
> +int mdev_device_create(struct kobject *kobj, struct device *dev, uuid_le uuid)
> +{
> +	int ret;
> +	struct mdev_device *mdev;
> +	struct parent_device *parent;
> +	struct mdev_type *type = to_mdev_type(kobj);
> +
> +	parent = mdev_get_parent(type->parent);
> +	if (!parent)
> +		return -EINVAL;
> +
> +	/* Check for duplicate */
> +	mdev = __find_mdev_device(parent, uuid);
> +	if (mdev) {
> +		ret = -EEXIST;
> +		goto create_err;
> +	}
> +
> +	mdev = kzalloc(sizeof(*mdev), GFP_KERNEL);
> +	if (!mdev) {
> +		ret = -ENOMEM;
> +		goto create_err;
> +	}
> +
> +	memcpy(&mdev->uuid, &uuid, sizeof(uuid_le));
> +	mdev->parent = parent;
> +	kref_init(&mdev->ref);
> +
> +	mdev->dev.parent  = dev;
> +	mdev->dev.bus     = &mdev_bus_type;
> +	mdev->dev.release = mdev_device_release;
> +	dev_set_name(&mdev->dev, "%pUl", uuid.b);
> +
> +	ret = device_register(&mdev->dev);
> +	if (ret) {
> +		put_device(&mdev->dev);
> +		goto create_err;
> +	}
> +
> +	ret = mdev_device_create_ops(kobj, mdev);
> +	if (ret)
> +		goto create_failed;
> +
> +	ret = mdev_create_sysfs_files(&mdev->dev, type);
> +	if (ret) {
> +		mdev_device_remove_ops(mdev, true);
> +		goto create_failed;
> +	}
> +
> +	mdev->type_kobj = kobj;
> +	dev_dbg(&mdev->dev, "MDEV: created\n");
> +
> +	return ret;
> +
> +create_failed:
> +	device_unregister(&mdev->dev);
> +
> +create_err:
> +	mdev_put_parent(parent);
> +	return ret;
> +}
> +
> +int mdev_device_remove(struct device *dev, bool force_remove)
> +{
> +	struct mdev_device *mdev;
> +	struct parent_device *parent;
> +	struct mdev_type *type;
> +	int ret = 0;
> +
> +	if (!dev_is_mdev(dev))
> +		return 0;
> +
> +	mdev = to_mdev_device(dev);
> +	parent = mdev->parent;
> +	type = to_mdev_type(mdev->type_kobj);
> +
> +	ret = mdev_device_remove_ops(mdev, force_remove);
> +	if (ret)
> +		return ret;
> +
> +	mdev_remove_sysfs_files(dev, type);
> +	device_unregister(dev);
> +	mdev_put_parent(parent);
> +	return ret;
> +}
> +
> +static int __init mdev_init(void)
> +{
> +	int ret;
> +
> +	ret = mdev_bus_register();
> +	if (ret) {
> +		pr_err("Failed to register mdev bus\n");
> +		return ret;
> +	}
> +
> +	mdev_bus_compat_class = class_compat_register("mdev_bus");
> +	if (!mdev_bus_compat_class) {
> +		mdev_bus_unregister();
> +		return -ENOMEM;
> +	}
> +
> +	/*
> +	 * Attempt to load known vfio_mdev.  This gives us a working environment
> +	 * without the user needing to explicitly load vfio_mdev driver.
> +	 */
> +	request_module_nowait("vfio_mdev");
> +
> +	return ret;
> +}
> +
> +static void __exit mdev_exit(void)
> +{
> +	class_compat_unregister(mdev_bus_compat_class);
> +	mdev_bus_unregister();
> +}
> +
> +module_init(mdev_init)
> +module_exit(mdev_exit)
> +
> +MODULE_VERSION(DRIVER_VERSION);
> +MODULE_LICENSE("GPL");
> +MODULE_AUTHOR(DRIVER_AUTHOR);
> +MODULE_DESCRIPTION(DRIVER_DESC);
> diff --git a/drivers/vfio/mdev/mdev_driver.c b/drivers/vfio/mdev/mdev_driver.c
> new file mode 100644
> index 000000000000..7768ef87f528
> --- /dev/null
> +++ b/drivers/vfio/mdev/mdev_driver.c
> @@ -0,0 +1,128 @@
> +/*
> + * MDEV driver
> + *
> + * Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved.
> + *     Author: Neo Jia <cjia@nvidia.com>
> + *	       Kirti Wankhede <kwankhede@nvidia.com>
> + *
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License version 2 as
> + * published by the Free Software Foundation.
> + */
> +
> +#include <linux/device.h>
> +#include <linux/iommu.h>
> +#include <linux/mdev.h>
> +
> +#include "mdev_private.h"
> +
> +static int mdev_attach_iommu(struct mdev_device *mdev)
> +{
> +	int ret;
> +	struct iommu_group *group;
> +
> +	group = iommu_group_alloc();
> +	if (IS_ERR(group)) {
> +		dev_err(&mdev->dev, "MDEV: failed to allocate group!\n");
> +		return PTR_ERR(group);
> +	}
> +
> +	ret = iommu_group_add_device(group, &mdev->dev);
> +	if (ret) {
> +		dev_err(&mdev->dev, "MDEV: failed to add dev to group!\n");
> +		goto attach_fail;
> +	}
> +
> +	dev_info(&mdev->dev, "MDEV: group_id = %d\n",
> +				 iommu_group_id(group));
> +attach_fail:
> +	iommu_group_put(group);
> +	return ret;
> +}
> +
> +static void mdev_detach_iommu(struct mdev_device *mdev)
> +{
> +	iommu_group_remove_device(&mdev->dev);
> +	dev_info(&mdev->dev, "MDEV: detaching iommu\n");
> +}
> +
> +static int mdev_probe(struct device *dev)
> +{
> +	struct mdev_driver *drv = to_mdev_driver(dev->driver);
> +	struct mdev_device *mdev = to_mdev_device(dev);
> +	int ret;
> +
> +	ret = mdev_attach_iommu(mdev);
> +	if (ret) {
> +		dev_err(dev, "Failed to attach IOMMU\n");
> +		return ret;
> +	}
> +
> +	if (drv && drv->probe)
> +		ret = drv->probe(dev);
> +
> +	if (ret)
> +		mdev_detach_iommu(mdev);
> +
> +	return ret;
> +}
> +
> +static int mdev_remove(struct device *dev)
> +{
> +	struct mdev_driver *drv = to_mdev_driver(dev->driver);
> +	struct mdev_device *mdev = to_mdev_device(dev);
> +
> +	if (drv && drv->remove)
> +		drv->remove(dev);
> +
> +	mdev_detach_iommu(mdev);
> +
> +	return 0;
> +}
> +
> +struct bus_type mdev_bus_type = {
> +	.name		= "mdev",
> +	.probe		= mdev_probe,
> +	.remove		= mdev_remove,
> +};
> +EXPORT_SYMBOL_GPL(mdev_bus_type);
> +
> +/*
> + * mdev_register_driver - register a new MDEV driver
> + * @drv: the driver to register
> + * @owner: module owner of driver to be registered
> + *
> + * Returns a negative value on error, otherwise 0.
> + */
> +int mdev_register_driver(struct mdev_driver *drv, struct module *owner)
> +{
> +	/* initialize common driver fields */
> +	drv->driver.name = drv->name;
> +	drv->driver.bus = &mdev_bus_type;
> +	drv->driver.owner = owner;
> +
> +	/* register with core */
> +	return driver_register(&drv->driver);
> +}
> +EXPORT_SYMBOL(mdev_register_driver);
> +
> +/*
> + * mdev_unregister_driver - unregister MDEV driver
> + * @drv: the driver to unregister
> + *
> + */
> +void mdev_unregister_driver(struct mdev_driver *drv)
> +{
> +	driver_unregister(&drv->driver);
> +}
> +EXPORT_SYMBOL(mdev_unregister_driver);
> +
> +int mdev_bus_register(void)
> +{
> +	return bus_register(&mdev_bus_type);
> +}
> +
> +void mdev_bus_unregister(void)
> +{
> +	bus_unregister(&mdev_bus_type);
> +}
> diff --git a/drivers/vfio/mdev/mdev_private.h b/drivers/vfio/mdev/mdev_private.h
> new file mode 100644
> index 000000000000..000c93fcfdbd
> --- /dev/null
> +++ b/drivers/vfio/mdev/mdev_private.h
> @@ -0,0 +1,41 @@
> +/*
> + * Mediated device interal definitions
> + *
> + * Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved.
> + *     Author: Neo Jia <cjia@nvidia.com>
> + *	       Kirti Wankhede <kwankhede@nvidia.com>
> + *
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License version 2 as
> + * published by the Free Software Foundation.
> + */
> +
> +#ifndef MDEV_PRIVATE_H
> +#define MDEV_PRIVATE_H
> +
> +int  mdev_bus_register(void);
> +void mdev_bus_unregister(void);
> +
> +struct mdev_type {
> +	struct kobject kobj;
> +	struct kobject *devices_kobj;
> +	struct parent_device *parent;
> +	struct list_head next;
> +	struct attribute_group *group;
> +};
> +
> +#define to_mdev_type_attr(_attr)	\
> +	container_of(_attr, struct mdev_type_attribute, attr)
> +#define to_mdev_type(_kobj)		\
> +	container_of(_kobj, struct mdev_type, kobj)
> +
> +int  parent_create_sysfs_files(struct parent_device *parent);
> +void parent_remove_sysfs_files(struct parent_device *parent);
> +
> +int  mdev_create_sysfs_files(struct device *dev, struct mdev_type *type);
> +void mdev_remove_sysfs_files(struct device *dev, struct mdev_type *type);
> +
> +int  mdev_device_create(struct kobject *kobj, struct device *dev, uuid_le uuid);
> +int  mdev_device_remove(struct device *dev, bool force_remove);
> +
> +#endif /* MDEV_PRIVATE_H */
> diff --git a/drivers/vfio/mdev/mdev_sysfs.c b/drivers/vfio/mdev/mdev_sysfs.c
> new file mode 100644
> index 000000000000..426e35cf79d0
> --- /dev/null
> +++ b/drivers/vfio/mdev/mdev_sysfs.c
> @@ -0,0 +1,296 @@
> +/*
> + * File attributes for Mediated devices
> + *
> + * Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved.
> + *     Author: Neo Jia <cjia@nvidia.com>
> + *	       Kirti Wankhede <kwankhede@nvidia.com>
> + *
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License version 2 as
> + * published by the Free Software Foundation.
> + */
> +
> +#include <linux/sysfs.h>
> +#include <linux/ctype.h>
> +#include <linux/device.h>
> +#include <linux/slab.h>
> +#include <linux/uuid.h>
> +#include <linux/mdev.h>
> +
> +#include "mdev_private.h"
> +
> +/* Static functions */
> +
> +static ssize_t mdev_type_attr_show(struct kobject *kobj,
> +				     struct attribute *__attr, char *buf)
> +{
> +	struct mdev_type_attribute *attr = to_mdev_type_attr(__attr);
> +	struct mdev_type *type = to_mdev_type(kobj);
> +	ssize_t ret = -EIO;
> +
> +	if (attr->show)
> +		ret = attr->show(kobj, type->parent->dev, buf);
> +	return ret;
> +}
> +
> +static ssize_t mdev_type_attr_store(struct kobject *kobj,
> +				      struct attribute *__attr,
> +				      const char *buf, size_t count)
> +{
> +	struct mdev_type_attribute *attr = to_mdev_type_attr(__attr);
> +	struct mdev_type *type = to_mdev_type(kobj);
> +	ssize_t ret = -EIO;
> +
> +	if (attr->store)
> +		ret = attr->store(&type->kobj, type->parent->dev, buf, count);
> +	return ret;
> +}
> +
> +static const struct sysfs_ops mdev_type_sysfs_ops = {
> +	.show = mdev_type_attr_show,
> +	.store = mdev_type_attr_store,
> +};
> +
> +static ssize_t create_store(struct kobject *kobj, struct device *dev,
> +			    const char *buf, size_t count)
> +{
> +	char *str;
> +	uuid_le uuid;
> +	int ret;
> +
> +	if (count < UUID_STRING_LEN)
> +		return -EINVAL;
> +
> +	str = kstrndup(buf, count, GFP_KERNEL);
> +	if (!str)
> +		return -ENOMEM;
> +
> +	ret = uuid_le_to_bin(str, &uuid);
> +	if (!ret) {
> +
> +		ret = mdev_device_create(kobj, dev, uuid);
> +		if (ret)
> +			pr_err("mdev_create: Failed to create mdev device\n");
> +		else
> +			ret = count;
> +	}
> +
> +	kfree(str);
> +	return ret;
> +}
> +
> +MDEV_TYPE_ATTR_WO(create);
> +
> +static void mdev_type_release(struct kobject *kobj)
> +{
> +	struct mdev_type *type = to_mdev_type(kobj);
> +
> +	pr_debug("Releasing group %s\n", kobj->name);
> +	kfree(type);
> +}
> +
> +static struct kobj_type mdev_type_ktype = {
> +	.sysfs_ops = &mdev_type_sysfs_ops,
> +	.release = mdev_type_release,
> +};
> +
> +struct mdev_type *add_mdev_supported_type(struct parent_device *parent,
> +					  struct attribute_group *group)
> +{
> +	struct mdev_type *type;
> +	int ret;
> +
> +	if (!group->name) {
> +		pr_err("%s: Type name empty!\n", __func__);
> +		return ERR_PTR(-EINVAL);
> +	}
> +
> +	type = kzalloc(sizeof(*type), GFP_KERNEL);
> +	if (!type)
> +		return ERR_PTR(-ENOMEM);
> +
> +	type->kobj.kset = parent->mdev_types_kset;
> +
> +	ret = kobject_init_and_add(&type->kobj, &mdev_type_ktype, NULL,
> +				   "%s-%s", dev_driver_string(parent->dev),
> +				   group->name);
> +	if (ret) {
> +		kfree(type);
> +		return ERR_PTR(ret);
> +	}
> +
> +	ret = sysfs_create_file(&type->kobj, &mdev_type_attr_create.attr);
> +	if (ret)
> +		goto attr_create_failed;
> +
> +	type->devices_kobj = kobject_create_and_add("devices", &type->kobj);
> +	if (!type->devices_kobj) {
> +		ret = -ENOMEM;
> +		goto attr_devices_failed;
> +	}
> +
> +	ret = sysfs_create_files(&type->kobj,
> +				 (const struct attribute **)group->attrs);
> +	if (ret) {
> +		ret = -ENOMEM;
> +		goto attrs_failed;
> +	}
> +
> +	type->group = group;
> +	type->parent = parent;
> +	return type;
> +
> +attrs_failed:
> +	kobject_put(type->devices_kobj);
> +attr_devices_failed:
> +	sysfs_remove_file(&type->kobj, &mdev_type_attr_create.attr);
> +attr_create_failed:
> +	kobject_del(&type->kobj);
> +	kobject_put(&type->kobj);
> +	return ERR_PTR(ret);
> +}
> +
> +static void remove_mdev_supported_type(struct mdev_type *type)
> +{
> +	sysfs_remove_files(&type->kobj,
> +			   (const struct attribute **)type->group->attrs);
> +	kobject_put(type->devices_kobj);
> +	sysfs_remove_file(&type->kobj, &mdev_type_attr_create.attr);
> +	kobject_del(&type->kobj);
> +	kobject_put(&type->kobj);
> +}
> +
> +static int add_mdev_supported_type_groups(struct parent_device *parent)
> +{
> +	int i;
> +
> +	for (i = 0; parent->ops->supported_type_groups[i]; i++) {
> +		struct mdev_type *type;
> +
> +		type = add_mdev_supported_type(parent,
> +					parent->ops->supported_type_groups[i]);
> +		if (IS_ERR(type)) {
> +			struct mdev_type *ltype, *tmp;
> +
> +			list_for_each_entry_safe(ltype, tmp, &parent->type_list,
> +						  next) {
> +				list_del(&ltype->next);
> +				remove_mdev_supported_type(ltype);
> +			}
> +			return PTR_ERR(type);
> +		}
> +		list_add(&type->next, &parent->type_list);
> +	}
> +	return 0;
> +}
> +
> +/* mdev sysfs Functions */
> +
> +void parent_remove_sysfs_files(struct parent_device *parent)
> +{
> +	struct mdev_type *type, *tmp;
> +
> +	list_for_each_entry_safe(type, tmp, &parent->type_list, next) {
> +		list_del(&type->next);
> +		remove_mdev_supported_type(type);
> +	}
> +
> +	sysfs_remove_groups(&parent->dev->kobj, parent->ops->dev_attr_groups);
> +	kset_unregister(parent->mdev_types_kset);
> +}
> +
> +int parent_create_sysfs_files(struct parent_device *parent)
> +{
> +	int ret;
> +
> +	parent->mdev_types_kset = kset_create_and_add("mdev_supported_types",
> +					       NULL, &parent->dev->kobj);
> +
> +	if (!parent->mdev_types_kset)
> +		return -ENOMEM;
> +
> +	INIT_LIST_HEAD(&parent->type_list);
> +
> +	ret = sysfs_create_groups(&parent->dev->kobj,
> +				  parent->ops->dev_attr_groups);
> +	if (ret)
> +		goto create_err;
> +
> +	ret = add_mdev_supported_type_groups(parent);
> +	if (ret)
> +		sysfs_remove_groups(&parent->dev->kobj,
> +				    parent->ops->dev_attr_groups);
> +	else
> +		return ret;
> +
> +create_err:
> +	kset_unregister(parent->mdev_types_kset);
> +	return ret;
> +}
> +
> +static ssize_t remove_store(struct device *dev, struct device_attribute *attr,
> +			    const char *buf, size_t count)
> +{
> +	unsigned long val;
> +
> +	if (kstrtoul(buf, 0, &val) < 0)
> +		return -EINVAL;
> +
> +	if (val && device_remove_file_self(dev, attr)) {
> +		int ret;
> +
> +		ret = mdev_device_remove(dev, false);
> +		if (ret) {
> +			device_create_file(dev, attr);
> +			return ret;
> +		}
> +	}
> +
> +	return count;
> +}
> +
> +static DEVICE_ATTR_WO(remove);
> +
> +static const struct attribute *mdev_device_attrs[] = {
> +	&dev_attr_remove.attr,
> +	NULL,
> +};
> +
> +int  mdev_create_sysfs_files(struct device *dev, struct mdev_type *type)
> +{
> +	int ret;
> +
> +	ret = sysfs_create_files(&dev->kobj, mdev_device_attrs);
> +	if (ret) {
> +		pr_err("Failed to create remove sysfs entry\n");
> +		return ret;
> +	}
> +
> +	ret = sysfs_create_link(type->devices_kobj, &dev->kobj, dev_name(dev));
> +	if (ret) {
> +		pr_err("Failed to create symlink in types\n");

looks wrong place...

> +		goto device_link_failed;
> +	}
> +
> +	ret = sysfs_create_link(&dev->kobj, &type->kobj, "mdev_type");
> +	if (ret) {
> +		pr_err("Failed to create symlink in device directory\n");

exchange with above.

> +		goto type_link_failed;
> +	}
> +
> +	return ret;
> +
> +type_link_failed:
> +	sysfs_remove_link(type->devices_kobj, dev_name(dev));
> +device_link_failed:
> +	sysfs_remove_files(&dev->kobj, mdev_device_attrs);
> +	return ret;
> +}
> +
> +void mdev_remove_sysfs_files(struct device *dev, struct mdev_type *type)
> +{
> +	sysfs_remove_link(&dev->kobj, "mdev_type");
> +	sysfs_remove_link(type->devices_kobj, dev_name(dev));
> +	sysfs_remove_files(&dev->kobj, mdev_device_attrs);
> +
> +}
> diff --git a/include/linux/mdev.h b/include/linux/mdev.h
> new file mode 100644
> index 000000000000..727209b2a67f
> --- /dev/null
> +++ b/include/linux/mdev.h
> @@ -0,0 +1,177 @@
> +/*
> + * Mediated device definition
> + *
> + * Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved.
> + *     Author: Neo Jia <cjia@nvidia.com>
> + *	       Kirti Wankhede <kwankhede@nvidia.com>
> + *
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License version 2 as
> + * published by the Free Software Foundation.
> + */
> +
> +#ifndef MDEV_H
> +#define MDEV_H
> +
> +#include <uapi/linux/vfio.h>
> +
> +struct parent_device;
> +
> +/* Mediated device */
> +struct mdev_device {
> +	struct device		dev;
> +	struct parent_device	*parent;
> +	uuid_le			uuid;
> +	void			*driver_data;
> +
> +	/* internal only */
> +	struct kref		ref;
> +	struct list_head	next;
> +	struct kobject		*type_kobj;
> +};
> +
> +
> +/**
> + * struct parent_ops - Structure to be registered for each parent device to
> + * register the device to mdev module.
> + *
> + * @owner:		The module owner.
> + * @dev_attr_groups:	Attributes of the parent device.
> + * @mdev_attr_groups:	Attributes of the mediated device.
> + * @supported_type_groups: Attributes to define supported types. It is mandatory
> + *			to provide supported types.
> + * @create:		Called to allocate basic resources in parent device's
> + *			driver for a particular mediated device. It is
> + *			mandatory to provide create ops.
> + *			@kobj: kobject of type for which 'create' is called.
> + *			@mdev: mdev_device structure on of mediated device
> + *			      that is being created
> + *			Returns integer: success (0) or error (< 0)
> + * @remove:		Called to free resources in parent device's driver for a
> + *			a mediated device. It is mandatory to provide 'remove'
> + *			ops.
> + *			@mdev: mdev_device device structure which is being
> + *			       destroyed
> + *			Returns integer: success (0) or error (< 0)
> + * @open:		Open mediated device.
> + *			@mdev: mediated device.
> + *			Returns integer: success (0) or error (< 0)
> + * @release:		release mediated device
> + *			@mdev: mediated device.
> + * @read:		Read emulation callback
> + *			@mdev: mediated device structure
> + *			@buf: read buffer
> + *			@count: number of bytes to read
> + *			@ppos: address.
> + *			Retuns number on bytes read on success or error.
> + * @write:		Write emulation callback
> + *			@mdev: mediated device structure
> + *			@buf: write buffer
> + *			@count: number of bytes to be written
> + *			@ppos: address.
> + *			Retuns number on bytes written on success or error.
> + * @ioctl:		IOCTL callback
> + *			@mdev: mediated device structure
> + *			@cmd: mediated device structure
> + *			@arg: mediated device structure
> + * @mmap:		mmap callback
> + * Parent device that support mediated device should be registered with mdev
> + * module with parent_ops structure.
> + */
> +
> +struct parent_ops {
> +	struct module   *owner;
> +	const struct attribute_group **dev_attr_groups;
> +	const struct attribute_group **mdev_attr_groups;
> +	struct attribute_group **supported_type_groups;
> +
> +	int     (*create)(struct kobject *kobj, struct mdev_device *mdev);
> +	int     (*remove)(struct mdev_device *mdev);
> +	int     (*open)(struct mdev_device *mdev);
> +	void    (*release)(struct mdev_device *mdev);
> +	ssize_t (*read)(struct mdev_device *mdev, char __user *buf,
> +			size_t count, loff_t *ppos);
> +	ssize_t (*write)(struct mdev_device *mdev, const char __user *buf,
> +			 size_t count, loff_t *ppos);
> +	ssize_t (*ioctl)(struct mdev_device *mdev, unsigned int cmd,
> +			 unsigned long arg);
> +	int	(*mmap)(struct mdev_device *mdev, struct vm_area_struct *vma);
> +};
> +
> +/* Parent Device */
> +struct parent_device {
> +	struct device		*dev;
> +	const struct parent_ops	*ops;
> +
> +	/* internal */
> +	struct kref		ref;
> +	struct list_head	next;
> +	struct kset *mdev_types_kset;
> +	struct list_head	type_list;
> +};
> +
> +/* interface for exporting mdev supported type attributes */
> +struct mdev_type_attribute {
> +	struct attribute attr;
> +	ssize_t (*show)(struct kobject *kobj, struct device *dev, char *buf);
> +	ssize_t (*store)(struct kobject *kobj, struct device *dev,
> +			 const char *buf, size_t count);
> +};
> +
> +#define MDEV_TYPE_ATTR(_name, _mode, _show, _store)		\
> +struct mdev_type_attribute mdev_type_attr_##_name =		\
> +	__ATTR(_name, _mode, _show, _store)
> +#define MDEV_TYPE_ATTR_RW(_name) \
> +	struct mdev_type_attribute mdev_type_attr_##_name = __ATTR_RW(_name)
> +#define MDEV_TYPE_ATTR_RO(_name) \
> +	struct mdev_type_attribute mdev_type_attr_##_name = __ATTR_RO(_name)
> +#define MDEV_TYPE_ATTR_WO(_name) \
> +	struct mdev_type_attribute mdev_type_attr_##_name = __ATTR_WO(_name)
> +
> +/**
> + * struct mdev_driver - Mediated device driver
> + * @name: driver name
> + * @probe: called when new device created
> + * @remove: called when device removed
> + * @driver: device driver structure
> + *
> + **/
> +struct mdev_driver {
> +	const char *name;
> +	int  (*probe)(struct device *dev);
> +	void (*remove)(struct device *dev);
> +	struct device_driver driver;
> +};
> +
> +static inline struct mdev_driver *to_mdev_driver(struct device_driver *drv)
> +{
> +	return drv ? container_of(drv, struct mdev_driver, driver) : NULL;
> +}
> +
> +static inline struct mdev_device *to_mdev_device(struct device *dev)
> +{
> +	return dev ? container_of(dev, struct mdev_device, dev) : NULL;
> +}
> +
> +static inline void *mdev_get_drvdata(struct mdev_device *mdev)
> +{
> +	return mdev->driver_data;
> +}
> +
> +static inline void mdev_set_drvdata(struct mdev_device *mdev, void *data)
> +{
> +	mdev->driver_data = data;
> +}
> +
> +extern struct bus_type mdev_bus_type;
> +
> +#define dev_is_mdev(d) ((d)->bus == &mdev_bus_type)
> +
> +extern int  mdev_register_device(struct device *dev,
> +				 const struct parent_ops *ops);
> +extern void mdev_unregister_device(struct device *dev);
> +
> +extern int  mdev_register_driver(struct mdev_driver *drv, struct module *owner);
> +extern void mdev_unregister_driver(struct mdev_driver *drv);
> +
> +#endif /* MDEV_H */
> --
> 2.7.0

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Kirti Wankhede Oct. 26, 2016, 2:58 p.m. UTC | #8
>> Medisted bus driver is responsible to add/delete mediated devices to/from
> 
> Medisted -> Mediated
>

Thanks for pointing out the typeo. Correcting it.


>> VFIO group when devices are bound and unbound to the driver.
>>
>> 2. Physical device driver interface
>> This interface provides vendor driver the set APIs to manage physical
>> device related work in its driver. APIs are :
>>
>> * dev_attr_groups: attributes of the parent device.
>> * mdev_attr_groups: attributes of the mediated device.
>> * supported_type_groups: attributes to define supported type. This is
>> 			 mandatory field.
>> * create: to allocate basic resources in driver for a mediated device.
> 
> in 'which driver'? it should be clear to remove 'in driver' here
> 
>> * remove: to free resources in driver when mediated device is destroyed.
>> * open: open callback of mediated device
>> * release: release callback of mediated device
>> * read : read emulation callback.
>> * write: write emulation callback.
>> * mmap: mmap emulation callback.
>> * ioctl: ioctl callback.
> 
> You only highlight 'mandatory field' for supported_type_groups. What
> about other fields? Are all of them optional? Please clarify and also
> stay consistent to later code comment.
> 

'create' and 'remove' are mandatory. Updating the description here. Rest
all are not cross-checked in mdev core driver, like 'create' and
'remove' but yes rest are optional. If vendor driver don't want to
support emulated region they don't need read/write callbacks. Similarly
if vendor driver don't want to support mmap region, they don't need mmap
callback.

Code comments are consistent with this description.

...
>> +
>> +config VFIO_MDEV
>> +    tristate "Mediated device driver framework"
>> +    depends on VFIO
>> +    default n
>> +    help
>> +        Provides a framework to virtualize devices which don't have SR_IOV
>> +	capability built-in.
> 
> This statement is not accurate. A device can support SR-IOV, but in the same
> time using this mediated technology w/ SR-IOV capability disabled.
> 

If SR-IOV is supported why would user use this framework? SR-IOV would
give better performance.

...
>> +
>> +static struct mdev_device *__find_mdev_device(struct parent_device *parent,
>> +					      uuid_le uuid)
> 
> parent_find_mdev_device?
> 

This function search for mdev device with given UUID, so I think its
consistent what we have below for parent, __find_parent_device().

...

>> +static int mdev_device_remove_ops(struct mdev_device *mdev, bool force_remove)
>> +{
> 
> 
> Can you add some comment here about when force_remove may be expected
> here, which would help others understand immediately instead of walking through
> the whole patch set?
>


mdev_device_remove_ops gets called from sysfs's 'remove' and when parent
device is being unregistered from mdev device framework.
- 'force_remove' is set to 'false' when called from sysfs's 'remove'
which indicates that if the mdev device is active, used by VMM or
userspace application, vendor driver could return error then don't
remove the device.
- 'force_remove' is set to 'true' when called from
mdev_unregister_device() which indicate that parent device is being
removed from mdev device framework so remove mdev device forcefully.

> 
>> +	struct parent_device *parent = mdev->parent;
>> +	int ret;
>> +
>> +	/*
>> +	 * Vendor driver can return error if VMM or userspace application is
>> +	 * using this mdev device.
>> +	 */
>> +	ret = parent->ops->remove(mdev);
> 
> what about passing force_remove flag to remove callback, so vendor driver
> can decide whether any force cleanup required?
>

'remove' getting called from sysfs is asynchronous, so vendor driver can
retrun failure in that case if vendor driver finds that mdev device is
being actively used.

mdev_unregister_device() is going to be called from vendor driver itself
when device is being unbound or driver is being unloaded. In this case
vendor driver can identify itself that its in its own teardown path.

So I feel there is no need to pass force_remove flag to 'remove' callback.



>> +int  mdev_create_sysfs_files(struct device *dev, struct mdev_type *type)
>> +{
>> +	int ret;
>> +
>> +	ret = sysfs_create_files(&dev->kobj, mdev_device_attrs);
>> +	if (ret) {
>> +		pr_err("Failed to create remove sysfs entry\n");
>> +		return ret;
>> +	}
>> +
>> +	ret = sysfs_create_link(type->devices_kobj, &dev->kobj, dev_name(dev));
>> +	if (ret) {
>> +		pr_err("Failed to create symlink in types\n");
> 
> looks wrong place...
>

No, this is correct. Above function creates symlink in
mdev_supported_types/<type>/devices directory.

>> +		goto device_link_failed;
>> +	}
>> +
>> +	ret = sysfs_create_link(&dev->kobj, &type->kobj, "mdev_type");
>> +	if (ret) {
>> +		pr_err("Failed to create symlink in device directory\n");
> 
> exchange with above.
> 
Again this is also correct. Above creates 'mdev_type' symlink in mdev
device's directory.

Although these are correct, removing these error prints. You can find it
from its return type or if sysfs functions through warnings.

Kirti.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Jike Song Oct. 27, 2016, 5:56 a.m. UTC | #9
On 10/21/2016 01:12 AM, Alex Williamson wrote:
> On Thu, 20 Oct 2016 15:23:53 +0800
> Jike Song <jike.song@intel.com> wrote:
> 
>> On 10/18/2016 05:22 AM, Kirti Wankhede wrote:
>>> diff --git a/drivers/vfio/mdev/mdev_core.c b/drivers/vfio/mdev/mdev_core.c
>>> new file mode 100644
>>> index 000000000000..7db5ec164aeb
>>> --- /dev/null
>>> +++ b/drivers/vfio/mdev/mdev_core.c
>>> @@ -0,0 +1,372 @@
>>> +/*
>>> + * Mediated device Core Driver
>>> + *
>>> + * Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved.
>>> + *     Author: Neo Jia <cjia@nvidia.com>
>>> + *	       Kirti Wankhede <kwankhede@nvidia.com>
>>> + *
>>> + * This program is free software; you can redistribute it and/or modify
>>> + * it under the terms of the GNU General Public License version 2 as
>>> + * published by the Free Software Foundation.
>>> + */
>>> +
>>> +#include <linux/module.h>
>>> +#include <linux/device.h>
>>> +#include <linux/slab.h>
>>> +#include <linux/uuid.h>
>>> +#include <linux/sysfs.h>
>>> +#include <linux/mdev.h>
>>> +
>>> +#include "mdev_private.h"
>>> +
>>> +#define DRIVER_VERSION		"0.1"
>>> +#define DRIVER_AUTHOR		"NVIDIA Corporation"
>>> +#define DRIVER_DESC		"Mediated device Core Driver"
>>> +
>>> +static LIST_HEAD(parent_list);
>>> +static DEFINE_MUTEX(parent_list_lock);
>>> +static struct class_compat *mdev_bus_compat_class;
>>> +  
>>
>>> +
>>> +/*
>>> + * mdev_register_device : Register a device
>>> + * @dev: device structure representing parent device.
>>> + * @ops: Parent device operation structure to be registered.
>>> + *
>>> + * Add device to list of registered parent devices.
>>> + * Returns a negative value on error, otherwise 0.
>>> + */
>>> +int mdev_register_device(struct device *dev, const struct parent_ops *ops)
>>> +{
>>> +	int ret = 0;
>>> +	struct parent_device *parent;
>>> +
>>> +	/* check for mandatory ops */
>>> +	if (!ops || !ops->create || !ops->remove || !ops->supported_type_groups)
>>> +		return -EINVAL;
>>> +
>>> +	dev = get_device(dev);
>>> +	if (!dev)
>>> +		return -EINVAL;
>>> +
>>> +	mutex_lock(&parent_list_lock);
>>> +
>>> +	/* Check for duplicate */
>>> +	parent = __find_parent_device(dev);
>>> +	if (parent) {
>>> +		ret = -EEXIST;
>>> +		goto add_dev_err;
>>> +	}
>>> +
>>> +	parent = kzalloc(sizeof(*parent), GFP_KERNEL);
>>> +	if (!parent) {
>>> +		ret = -ENOMEM;
>>> +		goto add_dev_err;
>>> +	}
>>> +
>>> +	kref_init(&parent->ref);
>>> +
>>> +	parent->dev = dev;
>>> +	parent->ops = ops;
>>> +
>>> +	ret = parent_create_sysfs_files(parent);
>>> +	if (ret) {
>>> +		mutex_unlock(&parent_list_lock);
>>> +		mdev_put_parent(parent);
>>> +		return ret;
>>> +	}
>>> +
>>> +	ret = class_compat_create_link(mdev_bus_compat_class, dev, NULL);
>>> +	if (ret)
>>> +		dev_warn(dev, "Failed to create compatibility class link\n");
>>> +
>>> +	list_add(&parent->next, &parent_list);
>>> +	mutex_unlock(&parent_list_lock);
>>> +
>>> +	dev_info(dev, "MDEV: Registered\n");
>>> +	return 0;
>>> +
>>> +add_dev_err:
>>> +	mutex_unlock(&parent_list_lock);
>>> +	put_device(dev);
>>> +	return ret;
>>> +}
>>> +EXPORT_SYMBOL(mdev_register_device);  
>>
>>> +static int __init mdev_init(void)
>>> +{
>>> +	int ret;
>>> +
>>> +	ret = mdev_bus_register();
>>> +	if (ret) {
>>> +		pr_err("Failed to register mdev bus\n");
>>> +		return ret;
>>> +	}
>>> +
>>> +	mdev_bus_compat_class = class_compat_register("mdev_bus");
>>> +	if (!mdev_bus_compat_class) {
>>> +		mdev_bus_unregister();
>>> +		return -ENOMEM;
>>> +	}
>>> +
>>> +	/*
>>> +	 * Attempt to load known vfio_mdev.  This gives us a working environment
>>> +	 * without the user needing to explicitly load vfio_mdev driver.
>>> +	 */
>>> +	request_module_nowait("vfio_mdev");
>>> +
>>> +	return ret;
>>> +}
>>> +
>>> +static void __exit mdev_exit(void)
>>> +{
>>> +	class_compat_unregister(mdev_bus_compat_class);
>>> +	mdev_bus_unregister();
>>> +}
>>> +
>>> +module_init(mdev_init)
>>> +module_exit(mdev_exit)  
>>
>> Hi Kirti,
>>
>> There is a possible issue: mdev_bus_register is called from mdev_init,
>> a module_init, equal to device_initcall if builtin to vmlinux; however,
>> the vendor driver, say i915.ko for intel case, have to call
>> mdev_register_device from its module_init: at that time, mdev_init
>> is still not called.
>>
>> Not sure if this issue exists with nvidia.ko. Though in most cases we
>> are expecting users select mdev as a standalone module, we still won't
>> break builtin case.
>>
>>
>> Hi Alex, do you have any suggestion here?
> 
> To fully solve the problem of built-in drivers making use of the mdev
> infrastructure we'd need to make mdev itself builtin and possibly a
> subsystem that is initialized prior to device drivers.  Is that really
> necessary?  Even though i915.ko is often loaded as part of an
> initramfs, most systems still build it as a module.  I would expect
> that standard module dependencies will pull in the necessary mdev and
> vfio modules to make this work correctly.  I can't say that I'm
> prepared to make mdev be a subsystem as would be necessary for builtin
> drivers to make use of.

Hi Alex,

I'm sorry to say that my previous understanding is not fully correct.
Current combination of mdev and i915 are prone to panic the system
as long as both built into vmlinux.

mdev_init:

	mdev_bus_register();
	mdev_bus_compat_class = class_compat_register("mdev_bus");
	request_module_nowait("vfio_mdev");

mdev_register_device:

	class_compat_create_link(mdev_bus_compat_class, dev, NULL);


If both mdev and i915 are builtin, the class_compat_create_link call
will simply panic the system. People having such .config will be annoyed,
for example, when he tries to bisect.

I'm not arguing that mdev should be a subsys here, but there still
be a possible way out. Adding the class_compat_register into
mdev_register_device if not yet registered, will help out. Maybe
it isn't the typical location to register a class, nonetheless it
fix the problem here with least impact.

Looking forward to your opinion :)

--
Thanks,
Jike

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/drivers/vfio/Kconfig b/drivers/vfio/Kconfig
index da6e2ce77495..23eced02aaf6 100644
--- a/drivers/vfio/Kconfig
+++ b/drivers/vfio/Kconfig
@@ -48,4 +48,5 @@  menuconfig VFIO_NOIOMMU
 
 source "drivers/vfio/pci/Kconfig"
 source "drivers/vfio/platform/Kconfig"
+source "drivers/vfio/mdev/Kconfig"
 source "virt/lib/Kconfig"
diff --git a/drivers/vfio/Makefile b/drivers/vfio/Makefile
index 7b8a31f63fea..4a23c13b6be4 100644
--- a/drivers/vfio/Makefile
+++ b/drivers/vfio/Makefile
@@ -7,3 +7,4 @@  obj-$(CONFIG_VFIO_IOMMU_SPAPR_TCE) += vfio_iommu_spapr_tce.o
 obj-$(CONFIG_VFIO_SPAPR_EEH) += vfio_spapr_eeh.o
 obj-$(CONFIG_VFIO_PCI) += pci/
 obj-$(CONFIG_VFIO_PLATFORM) += platform/
+obj-$(CONFIG_VFIO_MDEV) += mdev/
diff --git a/drivers/vfio/mdev/Kconfig b/drivers/vfio/mdev/Kconfig
new file mode 100644
index 000000000000..93addace9a67
--- /dev/null
+++ b/drivers/vfio/mdev/Kconfig
@@ -0,0 +1,11 @@ 
+
+config VFIO_MDEV
+    tristate "Mediated device driver framework"
+    depends on VFIO
+    default n
+    help
+        Provides a framework to virtualize devices which don't have SR_IOV
+	capability built-in.
+	See Documentation/vfio-mdev/vfio-mediated-device.txt for more details.
+
+        If you don't know what do here, say N.
diff --git a/drivers/vfio/mdev/Makefile b/drivers/vfio/mdev/Makefile
new file mode 100644
index 000000000000..31bc04801d94
--- /dev/null
+++ b/drivers/vfio/mdev/Makefile
@@ -0,0 +1,4 @@ 
+
+mdev-y := mdev_core.o mdev_sysfs.o mdev_driver.o
+
+obj-$(CONFIG_VFIO_MDEV) += mdev.o
diff --git a/drivers/vfio/mdev/mdev_core.c b/drivers/vfio/mdev/mdev_core.c
new file mode 100644
index 000000000000..7db5ec164aeb
--- /dev/null
+++ b/drivers/vfio/mdev/mdev_core.c
@@ -0,0 +1,372 @@ 
+/*
+ * Mediated device Core Driver
+ *
+ * Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved.
+ *     Author: Neo Jia <cjia@nvidia.com>
+ *	       Kirti Wankhede <kwankhede@nvidia.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/device.h>
+#include <linux/slab.h>
+#include <linux/uuid.h>
+#include <linux/sysfs.h>
+#include <linux/mdev.h>
+
+#include "mdev_private.h"
+
+#define DRIVER_VERSION		"0.1"
+#define DRIVER_AUTHOR		"NVIDIA Corporation"
+#define DRIVER_DESC		"Mediated device Core Driver"
+
+static LIST_HEAD(parent_list);
+static DEFINE_MUTEX(parent_list_lock);
+static struct class_compat *mdev_bus_compat_class;
+
+static int _find_mdev_device(struct device *dev, void *data)
+{
+	struct mdev_device *mdev;
+
+	if (!dev_is_mdev(dev))
+		return 0;
+
+	mdev = to_mdev_device(dev);
+
+	if (uuid_le_cmp(mdev->uuid, *(uuid_le *)data) == 0)
+		return 1;
+
+	return 0;
+}
+
+static struct mdev_device *__find_mdev_device(struct parent_device *parent,
+					      uuid_le uuid)
+{
+	struct device *dev;
+
+	dev = device_find_child(parent->dev, &uuid, _find_mdev_device);
+	if (!dev)
+		return NULL;
+
+	put_device(dev);
+
+	return to_mdev_device(dev);
+}
+
+/* Should be called holding parent_list_lock */
+static struct parent_device *__find_parent_device(struct device *dev)
+{
+	struct parent_device *parent;
+
+	list_for_each_entry(parent, &parent_list, next) {
+		if (parent->dev == dev)
+			return parent;
+	}
+	return NULL;
+}
+
+static void mdev_release_parent(struct kref *kref)
+{
+	struct parent_device *parent = container_of(kref, struct parent_device,
+						    ref);
+	struct device *dev = parent->dev;
+
+	kfree(parent);
+	put_device(dev);
+}
+
+static
+inline struct parent_device *mdev_get_parent(struct parent_device *parent)
+{
+	if (parent)
+		kref_get(&parent->ref);
+
+	return parent;
+}
+
+static inline void mdev_put_parent(struct parent_device *parent)
+{
+	if (parent)
+		kref_put(&parent->ref, mdev_release_parent);
+}
+
+static int mdev_device_create_ops(struct kobject *kobj,
+				  struct mdev_device *mdev)
+{
+	struct parent_device *parent = mdev->parent;
+	int ret;
+
+	ret = parent->ops->create(kobj, mdev);
+	if (ret)
+		return ret;
+
+	ret = sysfs_create_groups(&mdev->dev.kobj,
+				  parent->ops->mdev_attr_groups);
+	if (ret)
+		parent->ops->remove(mdev);
+
+	return ret;
+}
+
+static int mdev_device_remove_ops(struct mdev_device *mdev, bool force_remove)
+{
+	struct parent_device *parent = mdev->parent;
+	int ret;
+
+	/*
+	 * Vendor driver can return error if VMM or userspace application is
+	 * using this mdev device.
+	 */
+	ret = parent->ops->remove(mdev);
+	if (ret && !force_remove)
+		return -EBUSY;
+
+	sysfs_remove_groups(&mdev->dev.kobj, parent->ops->mdev_attr_groups);
+	return 0;
+}
+
+static int mdev_device_remove_cb(struct device *dev, void *data)
+{
+	return mdev_device_remove(dev, data ? *(bool *)data : true);
+}
+
+/*
+ * mdev_register_device : Register a device
+ * @dev: device structure representing parent device.
+ * @ops: Parent device operation structure to be registered.
+ *
+ * Add device to list of registered parent devices.
+ * Returns a negative value on error, otherwise 0.
+ */
+int mdev_register_device(struct device *dev, const struct parent_ops *ops)
+{
+	int ret = 0;
+	struct parent_device *parent;
+
+	/* check for mandatory ops */
+	if (!ops || !ops->create || !ops->remove || !ops->supported_type_groups)
+		return -EINVAL;
+
+	dev = get_device(dev);
+	if (!dev)
+		return -EINVAL;
+
+	mutex_lock(&parent_list_lock);
+
+	/* Check for duplicate */
+	parent = __find_parent_device(dev);
+	if (parent) {
+		ret = -EEXIST;
+		goto add_dev_err;
+	}
+
+	parent = kzalloc(sizeof(*parent), GFP_KERNEL);
+	if (!parent) {
+		ret = -ENOMEM;
+		goto add_dev_err;
+	}
+
+	kref_init(&parent->ref);
+
+	parent->dev = dev;
+	parent->ops = ops;
+
+	ret = parent_create_sysfs_files(parent);
+	if (ret) {
+		mutex_unlock(&parent_list_lock);
+		mdev_put_parent(parent);
+		return ret;
+	}
+
+	ret = class_compat_create_link(mdev_bus_compat_class, dev, NULL);
+	if (ret)
+		dev_warn(dev, "Failed to create compatibility class link\n");
+
+	list_add(&parent->next, &parent_list);
+	mutex_unlock(&parent_list_lock);
+
+	dev_info(dev, "MDEV: Registered\n");
+	return 0;
+
+add_dev_err:
+	mutex_unlock(&parent_list_lock);
+	put_device(dev);
+	return ret;
+}
+EXPORT_SYMBOL(mdev_register_device);
+
+/*
+ * mdev_unregister_device : Unregister a parent device
+ * @dev: device structure representing parent device.
+ *
+ * Remove device from list of registered parent devices. Give a chance to free
+ * existing mediated devices for given device.
+ */
+
+void mdev_unregister_device(struct device *dev)
+{
+	struct parent_device *parent;
+	bool force_remove = true;
+
+	mutex_lock(&parent_list_lock);
+	parent = __find_parent_device(dev);
+
+	if (!parent) {
+		mutex_unlock(&parent_list_lock);
+		return;
+	}
+	dev_info(dev, "MDEV: Unregistering\n");
+
+	/*
+	 * Remove parent from the list and remove "mdev_supported_types"
+	 * sysfs files so that no new mediated device could be
+	 * created for this parent
+	 */
+	list_del(&parent->next);
+	parent_remove_sysfs_files(parent);
+
+	mutex_unlock(&parent_list_lock);
+
+	class_compat_remove_link(mdev_bus_compat_class, dev, NULL);
+
+	device_for_each_child(dev, (void *)&force_remove,
+			      mdev_device_remove_cb);
+	mdev_put_parent(parent);
+}
+EXPORT_SYMBOL(mdev_unregister_device);
+
+static void mdev_device_release(struct device *dev)
+{
+	struct mdev_device *mdev = to_mdev_device(dev);
+
+	dev_dbg(&mdev->dev, "MDEV: destroying\n");
+	kfree(mdev);
+}
+
+int mdev_device_create(struct kobject *kobj, struct device *dev, uuid_le uuid)
+{
+	int ret;
+	struct mdev_device *mdev;
+	struct parent_device *parent;
+	struct mdev_type *type = to_mdev_type(kobj);
+
+	parent = mdev_get_parent(type->parent);
+	if (!parent)
+		return -EINVAL;
+
+	/* Check for duplicate */
+	mdev = __find_mdev_device(parent, uuid);
+	if (mdev) {
+		ret = -EEXIST;
+		goto create_err;
+	}
+
+	mdev = kzalloc(sizeof(*mdev), GFP_KERNEL);
+	if (!mdev) {
+		ret = -ENOMEM;
+		goto create_err;
+	}
+
+	memcpy(&mdev->uuid, &uuid, sizeof(uuid_le));
+	mdev->parent = parent;
+	kref_init(&mdev->ref);
+
+	mdev->dev.parent  = dev;
+	mdev->dev.bus     = &mdev_bus_type;
+	mdev->dev.release = mdev_device_release;
+	dev_set_name(&mdev->dev, "%pUl", uuid.b);
+
+	ret = device_register(&mdev->dev);
+	if (ret) {
+		put_device(&mdev->dev);
+		goto create_err;
+	}
+
+	ret = mdev_device_create_ops(kobj, mdev);
+	if (ret)
+		goto create_failed;
+
+	ret = mdev_create_sysfs_files(&mdev->dev, type);
+	if (ret) {
+		mdev_device_remove_ops(mdev, true);
+		goto create_failed;
+	}
+
+	mdev->type_kobj = kobj;
+	dev_dbg(&mdev->dev, "MDEV: created\n");
+
+	return ret;
+
+create_failed:
+	device_unregister(&mdev->dev);
+
+create_err:
+	mdev_put_parent(parent);
+	return ret;
+}
+
+int mdev_device_remove(struct device *dev, bool force_remove)
+{
+	struct mdev_device *mdev;
+	struct parent_device *parent;
+	struct mdev_type *type;
+	int ret = 0;
+
+	if (!dev_is_mdev(dev))
+		return 0;
+
+	mdev = to_mdev_device(dev);
+	parent = mdev->parent;
+	type = to_mdev_type(mdev->type_kobj);
+
+	ret = mdev_device_remove_ops(mdev, force_remove);
+	if (ret)
+		return ret;
+
+	mdev_remove_sysfs_files(dev, type);
+	device_unregister(dev);
+	mdev_put_parent(parent);
+	return ret;
+}
+
+static int __init mdev_init(void)
+{
+	int ret;
+
+	ret = mdev_bus_register();
+	if (ret) {
+		pr_err("Failed to register mdev bus\n");
+		return ret;
+	}
+
+	mdev_bus_compat_class = class_compat_register("mdev_bus");
+	if (!mdev_bus_compat_class) {
+		mdev_bus_unregister();
+		return -ENOMEM;
+	}
+
+	/*
+	 * Attempt to load known vfio_mdev.  This gives us a working environment
+	 * without the user needing to explicitly load vfio_mdev driver.
+	 */
+	request_module_nowait("vfio_mdev");
+
+	return ret;
+}
+
+static void __exit mdev_exit(void)
+{
+	class_compat_unregister(mdev_bus_compat_class);
+	mdev_bus_unregister();
+}
+
+module_init(mdev_init)
+module_exit(mdev_exit)
+
+MODULE_VERSION(DRIVER_VERSION);
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR(DRIVER_AUTHOR);
+MODULE_DESCRIPTION(DRIVER_DESC);
diff --git a/drivers/vfio/mdev/mdev_driver.c b/drivers/vfio/mdev/mdev_driver.c
new file mode 100644
index 000000000000..7768ef87f528
--- /dev/null
+++ b/drivers/vfio/mdev/mdev_driver.c
@@ -0,0 +1,128 @@ 
+/*
+ * MDEV driver
+ *
+ * Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved.
+ *     Author: Neo Jia <cjia@nvidia.com>
+ *	       Kirti Wankhede <kwankhede@nvidia.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/device.h>
+#include <linux/iommu.h>
+#include <linux/mdev.h>
+
+#include "mdev_private.h"
+
+static int mdev_attach_iommu(struct mdev_device *mdev)
+{
+	int ret;
+	struct iommu_group *group;
+
+	group = iommu_group_alloc();
+	if (IS_ERR(group)) {
+		dev_err(&mdev->dev, "MDEV: failed to allocate group!\n");
+		return PTR_ERR(group);
+	}
+
+	ret = iommu_group_add_device(group, &mdev->dev);
+	if (ret) {
+		dev_err(&mdev->dev, "MDEV: failed to add dev to group!\n");
+		goto attach_fail;
+	}
+
+	dev_info(&mdev->dev, "MDEV: group_id = %d\n",
+				 iommu_group_id(group));
+attach_fail:
+	iommu_group_put(group);
+	return ret;
+}
+
+static void mdev_detach_iommu(struct mdev_device *mdev)
+{
+	iommu_group_remove_device(&mdev->dev);
+	dev_info(&mdev->dev, "MDEV: detaching iommu\n");
+}
+
+static int mdev_probe(struct device *dev)
+{
+	struct mdev_driver *drv = to_mdev_driver(dev->driver);
+	struct mdev_device *mdev = to_mdev_device(dev);
+	int ret;
+
+	ret = mdev_attach_iommu(mdev);
+	if (ret) {
+		dev_err(dev, "Failed to attach IOMMU\n");
+		return ret;
+	}
+
+	if (drv && drv->probe)
+		ret = drv->probe(dev);
+
+	if (ret)
+		mdev_detach_iommu(mdev);
+
+	return ret;
+}
+
+static int mdev_remove(struct device *dev)
+{
+	struct mdev_driver *drv = to_mdev_driver(dev->driver);
+	struct mdev_device *mdev = to_mdev_device(dev);
+
+	if (drv && drv->remove)
+		drv->remove(dev);
+
+	mdev_detach_iommu(mdev);
+
+	return 0;
+}
+
+struct bus_type mdev_bus_type = {
+	.name		= "mdev",
+	.probe		= mdev_probe,
+	.remove		= mdev_remove,
+};
+EXPORT_SYMBOL_GPL(mdev_bus_type);
+
+/*
+ * mdev_register_driver - register a new MDEV driver
+ * @drv: the driver to register
+ * @owner: module owner of driver to be registered
+ *
+ * Returns a negative value on error, otherwise 0.
+ */
+int mdev_register_driver(struct mdev_driver *drv, struct module *owner)
+{
+	/* initialize common driver fields */
+	drv->driver.name = drv->name;
+	drv->driver.bus = &mdev_bus_type;
+	drv->driver.owner = owner;
+
+	/* register with core */
+	return driver_register(&drv->driver);
+}
+EXPORT_SYMBOL(mdev_register_driver);
+
+/*
+ * mdev_unregister_driver - unregister MDEV driver
+ * @drv: the driver to unregister
+ *
+ */
+void mdev_unregister_driver(struct mdev_driver *drv)
+{
+	driver_unregister(&drv->driver);
+}
+EXPORT_SYMBOL(mdev_unregister_driver);
+
+int mdev_bus_register(void)
+{
+	return bus_register(&mdev_bus_type);
+}
+
+void mdev_bus_unregister(void)
+{
+	bus_unregister(&mdev_bus_type);
+}
diff --git a/drivers/vfio/mdev/mdev_private.h b/drivers/vfio/mdev/mdev_private.h
new file mode 100644
index 000000000000..000c93fcfdbd
--- /dev/null
+++ b/drivers/vfio/mdev/mdev_private.h
@@ -0,0 +1,41 @@ 
+/*
+ * Mediated device interal definitions
+ *
+ * Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved.
+ *     Author: Neo Jia <cjia@nvidia.com>
+ *	       Kirti Wankhede <kwankhede@nvidia.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef MDEV_PRIVATE_H
+#define MDEV_PRIVATE_H
+
+int  mdev_bus_register(void);
+void mdev_bus_unregister(void);
+
+struct mdev_type {
+	struct kobject kobj;
+	struct kobject *devices_kobj;
+	struct parent_device *parent;
+	struct list_head next;
+	struct attribute_group *group;
+};
+
+#define to_mdev_type_attr(_attr)	\
+	container_of(_attr, struct mdev_type_attribute, attr)
+#define to_mdev_type(_kobj)		\
+	container_of(_kobj, struct mdev_type, kobj)
+
+int  parent_create_sysfs_files(struct parent_device *parent);
+void parent_remove_sysfs_files(struct parent_device *parent);
+
+int  mdev_create_sysfs_files(struct device *dev, struct mdev_type *type);
+void mdev_remove_sysfs_files(struct device *dev, struct mdev_type *type);
+
+int  mdev_device_create(struct kobject *kobj, struct device *dev, uuid_le uuid);
+int  mdev_device_remove(struct device *dev, bool force_remove);
+
+#endif /* MDEV_PRIVATE_H */
diff --git a/drivers/vfio/mdev/mdev_sysfs.c b/drivers/vfio/mdev/mdev_sysfs.c
new file mode 100644
index 000000000000..426e35cf79d0
--- /dev/null
+++ b/drivers/vfio/mdev/mdev_sysfs.c
@@ -0,0 +1,296 @@ 
+/*
+ * File attributes for Mediated devices
+ *
+ * Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved.
+ *     Author: Neo Jia <cjia@nvidia.com>
+ *	       Kirti Wankhede <kwankhede@nvidia.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/sysfs.h>
+#include <linux/ctype.h>
+#include <linux/device.h>
+#include <linux/slab.h>
+#include <linux/uuid.h>
+#include <linux/mdev.h>
+
+#include "mdev_private.h"
+
+/* Static functions */
+
+static ssize_t mdev_type_attr_show(struct kobject *kobj,
+				     struct attribute *__attr, char *buf)
+{
+	struct mdev_type_attribute *attr = to_mdev_type_attr(__attr);
+	struct mdev_type *type = to_mdev_type(kobj);
+	ssize_t ret = -EIO;
+
+	if (attr->show)
+		ret = attr->show(kobj, type->parent->dev, buf);
+	return ret;
+}
+
+static ssize_t mdev_type_attr_store(struct kobject *kobj,
+				      struct attribute *__attr,
+				      const char *buf, size_t count)
+{
+	struct mdev_type_attribute *attr = to_mdev_type_attr(__attr);
+	struct mdev_type *type = to_mdev_type(kobj);
+	ssize_t ret = -EIO;
+
+	if (attr->store)
+		ret = attr->store(&type->kobj, type->parent->dev, buf, count);
+	return ret;
+}
+
+static const struct sysfs_ops mdev_type_sysfs_ops = {
+	.show = mdev_type_attr_show,
+	.store = mdev_type_attr_store,
+};
+
+static ssize_t create_store(struct kobject *kobj, struct device *dev,
+			    const char *buf, size_t count)
+{
+	char *str;
+	uuid_le uuid;
+	int ret;
+
+	if (count < UUID_STRING_LEN)
+		return -EINVAL;
+
+	str = kstrndup(buf, count, GFP_KERNEL);
+	if (!str)
+		return -ENOMEM;
+
+	ret = uuid_le_to_bin(str, &uuid);
+	if (!ret) {
+
+		ret = mdev_device_create(kobj, dev, uuid);
+		if (ret)
+			pr_err("mdev_create: Failed to create mdev device\n");
+		else
+			ret = count;
+	}
+
+	kfree(str);
+	return ret;
+}
+
+MDEV_TYPE_ATTR_WO(create);
+
+static void mdev_type_release(struct kobject *kobj)
+{
+	struct mdev_type *type = to_mdev_type(kobj);
+
+	pr_debug("Releasing group %s\n", kobj->name);
+	kfree(type);
+}
+
+static struct kobj_type mdev_type_ktype = {
+	.sysfs_ops = &mdev_type_sysfs_ops,
+	.release = mdev_type_release,
+};
+
+struct mdev_type *add_mdev_supported_type(struct parent_device *parent,
+					  struct attribute_group *group)
+{
+	struct mdev_type *type;
+	int ret;
+
+	if (!group->name) {
+		pr_err("%s: Type name empty!\n", __func__);
+		return ERR_PTR(-EINVAL);
+	}
+
+	type = kzalloc(sizeof(*type), GFP_KERNEL);
+	if (!type)
+		return ERR_PTR(-ENOMEM);
+
+	type->kobj.kset = parent->mdev_types_kset;
+
+	ret = kobject_init_and_add(&type->kobj, &mdev_type_ktype, NULL,
+				   "%s-%s", dev_driver_string(parent->dev),
+				   group->name);
+	if (ret) {
+		kfree(type);
+		return ERR_PTR(ret);
+	}
+
+	ret = sysfs_create_file(&type->kobj, &mdev_type_attr_create.attr);
+	if (ret)
+		goto attr_create_failed;
+
+	type->devices_kobj = kobject_create_and_add("devices", &type->kobj);
+	if (!type->devices_kobj) {
+		ret = -ENOMEM;
+		goto attr_devices_failed;
+	}
+
+	ret = sysfs_create_files(&type->kobj,
+				 (const struct attribute **)group->attrs);
+	if (ret) {
+		ret = -ENOMEM;
+		goto attrs_failed;
+	}
+
+	type->group = group;
+	type->parent = parent;
+	return type;
+
+attrs_failed:
+	kobject_put(type->devices_kobj);
+attr_devices_failed:
+	sysfs_remove_file(&type->kobj, &mdev_type_attr_create.attr);
+attr_create_failed:
+	kobject_del(&type->kobj);
+	kobject_put(&type->kobj);
+	return ERR_PTR(ret);
+}
+
+static void remove_mdev_supported_type(struct mdev_type *type)
+{
+	sysfs_remove_files(&type->kobj,
+			   (const struct attribute **)type->group->attrs);
+	kobject_put(type->devices_kobj);
+	sysfs_remove_file(&type->kobj, &mdev_type_attr_create.attr);
+	kobject_del(&type->kobj);
+	kobject_put(&type->kobj);
+}
+
+static int add_mdev_supported_type_groups(struct parent_device *parent)
+{
+	int i;
+
+	for (i = 0; parent->ops->supported_type_groups[i]; i++) {
+		struct mdev_type *type;
+
+		type = add_mdev_supported_type(parent,
+					parent->ops->supported_type_groups[i]);
+		if (IS_ERR(type)) {
+			struct mdev_type *ltype, *tmp;
+
+			list_for_each_entry_safe(ltype, tmp, &parent->type_list,
+						  next) {
+				list_del(&ltype->next);
+				remove_mdev_supported_type(ltype);
+			}
+			return PTR_ERR(type);
+		}
+		list_add(&type->next, &parent->type_list);
+	}
+	return 0;
+}
+
+/* mdev sysfs Functions */
+
+void parent_remove_sysfs_files(struct parent_device *parent)
+{
+	struct mdev_type *type, *tmp;
+
+	list_for_each_entry_safe(type, tmp, &parent->type_list, next) {
+		list_del(&type->next);
+		remove_mdev_supported_type(type);
+	}
+
+	sysfs_remove_groups(&parent->dev->kobj, parent->ops->dev_attr_groups);
+	kset_unregister(parent->mdev_types_kset);
+}
+
+int parent_create_sysfs_files(struct parent_device *parent)
+{
+	int ret;
+
+	parent->mdev_types_kset = kset_create_and_add("mdev_supported_types",
+					       NULL, &parent->dev->kobj);
+
+	if (!parent->mdev_types_kset)
+		return -ENOMEM;
+
+	INIT_LIST_HEAD(&parent->type_list);
+
+	ret = sysfs_create_groups(&parent->dev->kobj,
+				  parent->ops->dev_attr_groups);
+	if (ret)
+		goto create_err;
+
+	ret = add_mdev_supported_type_groups(parent);
+	if (ret)
+		sysfs_remove_groups(&parent->dev->kobj,
+				    parent->ops->dev_attr_groups);
+	else
+		return ret;
+
+create_err:
+	kset_unregister(parent->mdev_types_kset);
+	return ret;
+}
+
+static ssize_t remove_store(struct device *dev, struct device_attribute *attr,
+			    const char *buf, size_t count)
+{
+	unsigned long val;
+
+	if (kstrtoul(buf, 0, &val) < 0)
+		return -EINVAL;
+
+	if (val && device_remove_file_self(dev, attr)) {
+		int ret;
+
+		ret = mdev_device_remove(dev, false);
+		if (ret) {
+			device_create_file(dev, attr);
+			return ret;
+		}
+	}
+
+	return count;
+}
+
+static DEVICE_ATTR_WO(remove);
+
+static const struct attribute *mdev_device_attrs[] = {
+	&dev_attr_remove.attr,
+	NULL,
+};
+
+int  mdev_create_sysfs_files(struct device *dev, struct mdev_type *type)
+{
+	int ret;
+
+	ret = sysfs_create_files(&dev->kobj, mdev_device_attrs);
+	if (ret) {
+		pr_err("Failed to create remove sysfs entry\n");
+		return ret;
+	}
+
+	ret = sysfs_create_link(type->devices_kobj, &dev->kobj, dev_name(dev));
+	if (ret) {
+		pr_err("Failed to create symlink in types\n");
+		goto device_link_failed;
+	}
+
+	ret = sysfs_create_link(&dev->kobj, &type->kobj, "mdev_type");
+	if (ret) {
+		pr_err("Failed to create symlink in device directory\n");
+		goto type_link_failed;
+	}
+
+	return ret;
+
+type_link_failed:
+	sysfs_remove_link(type->devices_kobj, dev_name(dev));
+device_link_failed:
+	sysfs_remove_files(&dev->kobj, mdev_device_attrs);
+	return ret;
+}
+
+void mdev_remove_sysfs_files(struct device *dev, struct mdev_type *type)
+{
+	sysfs_remove_link(&dev->kobj, "mdev_type");
+	sysfs_remove_link(type->devices_kobj, dev_name(dev));
+	sysfs_remove_files(&dev->kobj, mdev_device_attrs);
+
+}
diff --git a/include/linux/mdev.h b/include/linux/mdev.h
new file mode 100644
index 000000000000..727209b2a67f
--- /dev/null
+++ b/include/linux/mdev.h
@@ -0,0 +1,177 @@ 
+/*
+ * Mediated device definition
+ *
+ * Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved.
+ *     Author: Neo Jia <cjia@nvidia.com>
+ *	       Kirti Wankhede <kwankhede@nvidia.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef MDEV_H
+#define MDEV_H
+
+#include <uapi/linux/vfio.h>
+
+struct parent_device;
+
+/* Mediated device */
+struct mdev_device {
+	struct device		dev;
+	struct parent_device	*parent;
+	uuid_le			uuid;
+	void			*driver_data;
+
+	/* internal only */
+	struct kref		ref;
+	struct list_head	next;
+	struct kobject		*type_kobj;
+};
+
+
+/**
+ * struct parent_ops - Structure to be registered for each parent device to
+ * register the device to mdev module.
+ *
+ * @owner:		The module owner.
+ * @dev_attr_groups:	Attributes of the parent device.
+ * @mdev_attr_groups:	Attributes of the mediated device.
+ * @supported_type_groups: Attributes to define supported types. It is mandatory
+ *			to provide supported types.
+ * @create:		Called to allocate basic resources in parent device's
+ *			driver for a particular mediated device. It is
+ *			mandatory to provide create ops.
+ *			@kobj: kobject of type for which 'create' is called.
+ *			@mdev: mdev_device structure on of mediated device
+ *			      that is being created
+ *			Returns integer: success (0) or error (< 0)
+ * @remove:		Called to free resources in parent device's driver for a
+ *			a mediated device. It is mandatory to provide 'remove'
+ *			ops.
+ *			@mdev: mdev_device device structure which is being
+ *			       destroyed
+ *			Returns integer: success (0) or error (< 0)
+ * @open:		Open mediated device.
+ *			@mdev: mediated device.
+ *			Returns integer: success (0) or error (< 0)
+ * @release:		release mediated device
+ *			@mdev: mediated device.
+ * @read:		Read emulation callback
+ *			@mdev: mediated device structure
+ *			@buf: read buffer
+ *			@count: number of bytes to read
+ *			@ppos: address.
+ *			Retuns number on bytes read on success or error.
+ * @write:		Write emulation callback
+ *			@mdev: mediated device structure
+ *			@buf: write buffer
+ *			@count: number of bytes to be written
+ *			@ppos: address.
+ *			Retuns number on bytes written on success or error.
+ * @ioctl:		IOCTL callback
+ *			@mdev: mediated device structure
+ *			@cmd: mediated device structure
+ *			@arg: mediated device structure
+ * @mmap:		mmap callback
+ * Parent device that support mediated device should be registered with mdev
+ * module with parent_ops structure.
+ */
+
+struct parent_ops {
+	struct module   *owner;
+	const struct attribute_group **dev_attr_groups;
+	const struct attribute_group **mdev_attr_groups;
+	struct attribute_group **supported_type_groups;
+
+	int     (*create)(struct kobject *kobj, struct mdev_device *mdev);
+	int     (*remove)(struct mdev_device *mdev);
+	int     (*open)(struct mdev_device *mdev);
+	void    (*release)(struct mdev_device *mdev);
+	ssize_t (*read)(struct mdev_device *mdev, char __user *buf,
+			size_t count, loff_t *ppos);
+	ssize_t (*write)(struct mdev_device *mdev, const char __user *buf,
+			 size_t count, loff_t *ppos);
+	ssize_t (*ioctl)(struct mdev_device *mdev, unsigned int cmd,
+			 unsigned long arg);
+	int	(*mmap)(struct mdev_device *mdev, struct vm_area_struct *vma);
+};
+
+/* Parent Device */
+struct parent_device {
+	struct device		*dev;
+	const struct parent_ops	*ops;
+
+	/* internal */
+	struct kref		ref;
+	struct list_head	next;
+	struct kset *mdev_types_kset;
+	struct list_head	type_list;
+};
+
+/* interface for exporting mdev supported type attributes */
+struct mdev_type_attribute {
+	struct attribute attr;
+	ssize_t (*show)(struct kobject *kobj, struct device *dev, char *buf);
+	ssize_t (*store)(struct kobject *kobj, struct device *dev,
+			 const char *buf, size_t count);
+};
+
+#define MDEV_TYPE_ATTR(_name, _mode, _show, _store)		\
+struct mdev_type_attribute mdev_type_attr_##_name =		\
+	__ATTR(_name, _mode, _show, _store)
+#define MDEV_TYPE_ATTR_RW(_name) \
+	struct mdev_type_attribute mdev_type_attr_##_name = __ATTR_RW(_name)
+#define MDEV_TYPE_ATTR_RO(_name) \
+	struct mdev_type_attribute mdev_type_attr_##_name = __ATTR_RO(_name)
+#define MDEV_TYPE_ATTR_WO(_name) \
+	struct mdev_type_attribute mdev_type_attr_##_name = __ATTR_WO(_name)
+
+/**
+ * struct mdev_driver - Mediated device driver
+ * @name: driver name
+ * @probe: called when new device created
+ * @remove: called when device removed
+ * @driver: device driver structure
+ *
+ **/
+struct mdev_driver {
+	const char *name;
+	int  (*probe)(struct device *dev);
+	void (*remove)(struct device *dev);
+	struct device_driver driver;
+};
+
+static inline struct mdev_driver *to_mdev_driver(struct device_driver *drv)
+{
+	return drv ? container_of(drv, struct mdev_driver, driver) : NULL;
+}
+
+static inline struct mdev_device *to_mdev_device(struct device *dev)
+{
+	return dev ? container_of(dev, struct mdev_device, dev) : NULL;
+}
+
+static inline void *mdev_get_drvdata(struct mdev_device *mdev)
+{
+	return mdev->driver_data;
+}
+
+static inline void mdev_set_drvdata(struct mdev_device *mdev, void *data)
+{
+	mdev->driver_data = data;
+}
+
+extern struct bus_type mdev_bus_type;
+
+#define dev_is_mdev(d) ((d)->bus == &mdev_bus_type)
+
+extern int  mdev_register_device(struct device *dev,
+				 const struct parent_ops *ops);
+extern void mdev_unregister_device(struct device *dev);
+
+extern int  mdev_register_driver(struct mdev_driver *drv, struct module *owner);
+extern void mdev_unregister_driver(struct mdev_driver *drv);
+
+#endif /* MDEV_H */