diff mbox

[RFC,1/3] VFIO: Add new cmd to receive eventfd from userspace to notify IOMMU fault event

Message ID 1487515629-13815-2-git-send-email-tianyu.lan@intel.com (mailing list archive)
State New, archived
Headers show

Commit Message

lan,Tianyu Feb. 19, 2017, 2:47 p.m. UTC
This patch is to receive IOMMU fault eventfd and IOMMU fault event flag
from userspace. VFIO should register IOMMU fault event handler according
fault event flag which designates what kind of fault events need to be
reported.  When VFIO get IOMMU fault event from IOMMU driver, it should
notify userspace via received fd.

Signed-off-by: Lan Tianyu <tianyu.lan@intel.com>
---
 drivers/vfio/vfio_iommu_type1.c | 45 +++++++++++++++++++++++++++++++++++++++++
 include/uapi/linux/vfio.h       | 15 ++++++++++++++
 2 files changed, 60 insertions(+)

Comments

Alex Williamson Feb. 20, 2017, 8:53 p.m. UTC | #1
On Sun, 19 Feb 2017 22:47:07 +0800
Lan Tianyu <tianyu.lan@intel.com> wrote:

> This patch is to receive IOMMU fault eventfd and IOMMU fault event flag
> from userspace. VFIO should register IOMMU fault event handler according
> fault event flag which designates what kind of fault events need to be
> reported.  When VFIO get IOMMU fault event from IOMMU driver, it should
> notify userspace via received fd.
> 
> Signed-off-by: Lan Tianyu <tianyu.lan@intel.com>
> ---
>  drivers/vfio/vfio_iommu_type1.c | 45 +++++++++++++++++++++++++++++++++++++++++
>  include/uapi/linux/vfio.h       | 15 ++++++++++++++
>  2 files changed, 60 insertions(+)
> 
> diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c
> index b3cc33f..46674ea 100644
> --- a/drivers/vfio/vfio_iommu_type1.c
> +++ b/drivers/vfio/vfio_iommu_type1.c
> @@ -38,6 +38,7 @@
>  #include <linux/workqueue.h>
>  #include <linux/mdev.h>
>  #include <linux/notifier.h>
> +#include <linux/eventfd.h>
>  
>  #define DRIVER_VERSION  "0.2"
>  #define DRIVER_AUTHOR   "Alex Williamson <alex.williamson@redhat.com>"
> @@ -61,6 +62,8 @@ struct vfio_iommu {
>  	struct mutex		lock;
>  	struct rb_root		dma_list;
>  	struct blocking_notifier_head notifier;
> +	struct eventfd_ctx	*iommu_fault_fd;
> +	struct mutex            fault_lock;
>  	bool			v2;
>  	bool			nesting;
>  };
> @@ -1452,6 +1455,7 @@ static void *vfio_iommu_type1_open(unsigned long arg)
>  	INIT_LIST_HEAD(&iommu->domain_list);
>  	iommu->dma_list = RB_ROOT;
>  	mutex_init(&iommu->lock);
> +	mutex_init(&iommu->fault_lock);
>  	BLOCKING_INIT_NOTIFIER_HEAD(&iommu->notifier);
>  
>  	return iommu;
> @@ -1582,6 +1586,47 @@ static long vfio_iommu_type1_ioctl(void *iommu_data,
>  
>  		return copy_to_user((void __user *)arg, &unmap, minsz) ?
>  			-EFAULT : 0;
> +	} else if (cmd == VFIO_IOMMU_SET_FAULT_EVENTFD) {
> +		struct vfio_iommu_type1_set_fault_eventfd eventfd;
> +		int fd;
> +		int ret;
> +
> +		minsz = offsetofend(struct vfio_iommu_type1_set_fault_eventfd,
> +				    fd);
> +
> +		if (copy_from_user(&eventfd, (void __user *)arg, minsz))
> +			return -EFAULT;
> +
> +		if (eventfd.argsz < minsz)
> +			return -EINVAL;
> +
> +
> +		mutex_lock(&iommu->fault_lock);
> +
> +		fd = eventfd.fd;
> +		if (fd == -1) {
> +			if (iommu->iommu_fault_fd)
> +				eventfd_ctx_put(iommu->iommu_fault_fd);
> +			iommu->iommu_fault_fd = NULL;
> +			ret = 0;
> +		} else if (fd >= 0) {
> +			struct eventfd_ctx *ctx;
> +
> +			ctx = eventfd_ctx_fdget(fd);
> +			if (IS_ERR(ctx))
> +				return PTR_ERR(ctx);
> +
> +			if (ctx)
> +				eventfd_ctx_put(ctx);
> +
> +			iommu->iommu_fault_fd = ctx;
> +			ret = 0;
> +		} else
> +			ret = -EINVAL;
> +
> +		mutex_unlock(&iommu->fault_lock);
> +
> +		return ret;
>  	}
>  
>  	return -ENOTTY;
> diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h
> index 519eff3..8616334 100644
> --- a/include/uapi/linux/vfio.h
> +++ b/include/uapi/linux/vfio.h
> @@ -547,6 +547,21 @@ struct vfio_iommu_type1_dma_unmap {
>  #define VFIO_IOMMU_ENABLE	_IO(VFIO_TYPE, VFIO_BASE + 15)
>  #define VFIO_IOMMU_DISABLE	_IO(VFIO_TYPE, VFIO_BASE + 16)
>  
> +/*
> + * VFIO_IOMMU_SET_FAULT_EVENT_FD	_IO(VFIO_TYPE, VFIO_BASE + 17)
> + *
> + * Receive eventfd from userspace to notify fault event from IOMMU.
> + */
> +struct vfio_iommu_type1_set_fault_eventfd {
> +	__u32	argsz;
> +	__u32   flags;
> +/* What IOMMU Fault events should be reported. */
> +#define VFIO_IOMMU_UR_FAULT_WITHOUT_PASID (1 << 0)

Is this a filter?  Would multiple filters be allowed using the same
eventfd or would each type require a separate eventfd?  How would a
user be able to probe what flags/filters are available?  Should this
simply use the IRQ_INFO and SET_IRQ ioctls from the device file
descriptor for consistency?

> +	__s32	fd;			/* Eventfd from user space */
> +};
> +
> +#define VFIO_IOMMU_SET_FAULT_EVENTFD	_IO(VFIO_TYPE, VFIO_BASE + 17)
> +
>  /* -------- Additional API for SPAPR TCE (Server POWERPC) IOMMU -------- */
>  
>  /*
lan,Tianyu Feb. 21, 2017, 5:29 a.m. UTC | #2
On 2017年02月21日 04:53, Alex Williamson wrote:
> On Sun, 19 Feb 2017 22:47:07 +0800
> Lan Tianyu <tianyu.lan@intel.com> wrote:
>> diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h
>> index 519eff3..8616334 100644
>> --- a/include/uapi/linux/vfio.h
>> +++ b/include/uapi/linux/vfio.h
>> @@ -547,6 +547,21 @@ struct vfio_iommu_type1_dma_unmap {
>>  #define VFIO_IOMMU_ENABLE	_IO(VFIO_TYPE, VFIO_BASE + 15)
>>  #define VFIO_IOMMU_DISABLE	_IO(VFIO_TYPE, VFIO_BASE + 16)
>>  
>> +/*
>> + * VFIO_IOMMU_SET_FAULT_EVENT_FD	_IO(VFIO_TYPE, VFIO_BASE + 17)
>> + *
>> + * Receive eventfd from userspace to notify fault event from IOMMU.
>> + */
>> +struct vfio_iommu_type1_set_fault_eventfd {
>> +	__u32	argsz;
>> +	__u32   flags;
>> +/* What IOMMU Fault events should be reported. */
>> +#define VFIO_IOMMU_UR_FAULT_WITHOUT_PASID (1 << 0)
> 
> Is this a filter?  Would multiple filters be allowed using the same
> eventfd or would each type require a separate eventfd?  How would a
> user be able to probe what flags/filters are available?  Should this
> simply use the IRQ_INFO and SET_IRQ ioctls from the device file
> descriptor for consistency?

The filer should be in the IOMMU driver. VFIO driver needs to pass the
flag to IOMMU driver. The flag indicates what kind of fault events IOMMU
driver should report and is provided by userspace. Qemu has info what
kind of fault event it can handle. The new IOMMU driver API of
registering IOMMU fault notifier is still under design stage and so I
didn't use the flag in this patchset. The new VFIO fault event channel
also will affect the parameters of new IOMMU API and so I sent the
patchset first to get some conclusion before finalizing new IOMMU API.

Just like you mentioned "such an event might logically be connected to
the vfio container rather than the device" about IOMMU fault event in
the AER ERR mail(the discussion with Kevin), I put the IOMMU fault
evenfd with VFIO container rather than vfio-pci device.
Michael S. Tsirkin Feb. 21, 2017, 5:48 a.m. UTC | #3
On Sun, Feb 19, 2017 at 10:47:07PM +0800, Lan Tianyu wrote:
> This patch is to receive IOMMU fault eventfd and IOMMU fault event flag
> from userspace. VFIO should register IOMMU fault event handler according
> fault event flag which designates what kind of fault events need to be
> reported.  When VFIO get IOMMU fault event from IOMMU driver, it should
> notify userspace via received fd.
> 
> Signed-off-by: Lan Tianyu <tianyu.lan@intel.com>
> ---
>  drivers/vfio/vfio_iommu_type1.c | 45 +++++++++++++++++++++++++++++++++++++++++
>  include/uapi/linux/vfio.h       | 15 ++++++++++++++
>  2 files changed, 60 insertions(+)
> 
> diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c
> index b3cc33f..46674ea 100644
> --- a/drivers/vfio/vfio_iommu_type1.c
> +++ b/drivers/vfio/vfio_iommu_type1.c
> @@ -38,6 +38,7 @@
>  #include <linux/workqueue.h>
>  #include <linux/mdev.h>
>  #include <linux/notifier.h>
> +#include <linux/eventfd.h>
>  
>  #define DRIVER_VERSION  "0.2"
>  #define DRIVER_AUTHOR   "Alex Williamson <alex.williamson@redhat.com>"
> @@ -61,6 +62,8 @@ struct vfio_iommu {
>  	struct mutex		lock;
>  	struct rb_root		dma_list;
>  	struct blocking_notifier_head notifier;
> +	struct eventfd_ctx	*iommu_fault_fd;
> +	struct mutex            fault_lock;
>  	bool			v2;
>  	bool			nesting;
>  };
> @@ -1452,6 +1455,7 @@ static void *vfio_iommu_type1_open(unsigned long arg)
>  	INIT_LIST_HEAD(&iommu->domain_list);
>  	iommu->dma_list = RB_ROOT;
>  	mutex_init(&iommu->lock);
> +	mutex_init(&iommu->fault_lock);
>  	BLOCKING_INIT_NOTIFIER_HEAD(&iommu->notifier);
>  
>  	return iommu;
> @@ -1582,6 +1586,47 @@ static long vfio_iommu_type1_ioctl(void *iommu_data,
>  
>  		return copy_to_user((void __user *)arg, &unmap, minsz) ?
>  			-EFAULT : 0;
> +	} else if (cmd == VFIO_IOMMU_SET_FAULT_EVENTFD) {
> +		struct vfio_iommu_type1_set_fault_eventfd eventfd;
> +		int fd;
> +		int ret;
> +
> +		minsz = offsetofend(struct vfio_iommu_type1_set_fault_eventfd,
> +				    fd);
> +
> +		if (copy_from_user(&eventfd, (void __user *)arg, minsz))
> +			return -EFAULT;
> +
> +		if (eventfd.argsz < minsz)
> +			return -EINVAL;
> +
> +
> +		mutex_lock(&iommu->fault_lock);
> +
> +		fd = eventfd.fd;
> +		if (fd == -1) {
> +			if (iommu->iommu_fault_fd)
> +				eventfd_ctx_put(iommu->iommu_fault_fd);
> +			iommu->iommu_fault_fd = NULL;
> +			ret = 0;
> +		} else if (fd >= 0) {
> +			struct eventfd_ctx *ctx;
> +
> +			ctx = eventfd_ctx_fdget(fd);
> +			if (IS_ERR(ctx))
> +				return PTR_ERR(ctx);
> +
> +			if (ctx)
> +				eventfd_ctx_put(ctx);
> +
> +			iommu->iommu_fault_fd = ctx;
> +			ret = 0;
> +		} else
> +			ret = -EINVAL;
> +
> +		mutex_unlock(&iommu->fault_lock);
> +
> +		return ret;
>  	}
>  
>  	return -ENOTTY;
> diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h
> index 519eff3..8616334 100644
> --- a/include/uapi/linux/vfio.h
> +++ b/include/uapi/linux/vfio.h
> @@ -547,6 +547,21 @@ struct vfio_iommu_type1_dma_unmap {
>  #define VFIO_IOMMU_ENABLE	_IO(VFIO_TYPE, VFIO_BASE + 15)
>  #define VFIO_IOMMU_DISABLE	_IO(VFIO_TYPE, VFIO_BASE + 16)
>  
> +/*
> + * VFIO_IOMMU_SET_FAULT_EVENT_FD	_IO(VFIO_TYPE, VFIO_BASE + 17)
> + *
> + * Receive eventfd from userspace to notify fault event from IOMMU.
> + */
> +struct vfio_iommu_type1_set_fault_eventfd {
> +	__u32	argsz;
> +	__u32   flags;
> +/* What IOMMU Fault events should be reported. */
> +#define VFIO_IOMMU_UR_FAULT_WITHOUT_PASID (1 << 0)
> +	__s32	fd;			/* Eventfd from user space */
> +};
> +
> +#define VFIO_IOMMU_SET_FAULT_EVENTFD	_IO(VFIO_TYPE, VFIO_BASE + 17)
> +
>  /* -------- Additional API for SPAPR TCE (Server POWERPC) IOMMU -------- */

How about simply doing

        VFIO_PCI_MSIX_IRQ_INDEX,
        VFIO_PCI_ERR_IRQ_INDEX,
        VFIO_PCI_REQ_IRQ_INDEX,
+       VFIO_PCI_IOMMU_UR_FAULT_WITHOUT_PASID_IRQ_INDEX,
        VFIO_PCI_NUM_IRQS


instead?

This way you don't need to write a whole new kernel/userspace
interface.

>  /*
> -- 
> 1.8.3.1
Alex Williamson Feb. 21, 2017, 6:05 a.m. UTC | #4
On Tue, 21 Feb 2017 07:48:27 +0200
"Michael S. Tsirkin" <mst@redhat.com> wrote:

> On Sun, Feb 19, 2017 at 10:47:07PM +0800, Lan Tianyu wrote:
> > This patch is to receive IOMMU fault eventfd and IOMMU fault event flag
> > from userspace. VFIO should register IOMMU fault event handler according
> > fault event flag which designates what kind of fault events need to be
> > reported.  When VFIO get IOMMU fault event from IOMMU driver, it should
> > notify userspace via received fd.
> > 
> > Signed-off-by: Lan Tianyu <tianyu.lan@intel.com>
> > ---
> >  drivers/vfio/vfio_iommu_type1.c | 45 +++++++++++++++++++++++++++++++++++++++++
> >  include/uapi/linux/vfio.h       | 15 ++++++++++++++
> >  2 files changed, 60 insertions(+)
> > 
> > diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c
> > index b3cc33f..46674ea 100644
> > --- a/drivers/vfio/vfio_iommu_type1.c
> > +++ b/drivers/vfio/vfio_iommu_type1.c
> > @@ -38,6 +38,7 @@
> >  #include <linux/workqueue.h>
> >  #include <linux/mdev.h>
> >  #include <linux/notifier.h>
> > +#include <linux/eventfd.h>
> >  
> >  #define DRIVER_VERSION  "0.2"
> >  #define DRIVER_AUTHOR   "Alex Williamson <alex.williamson@redhat.com>"
> > @@ -61,6 +62,8 @@ struct vfio_iommu {
> >  	struct mutex		lock;
> >  	struct rb_root		dma_list;
> >  	struct blocking_notifier_head notifier;
> > +	struct eventfd_ctx	*iommu_fault_fd;
> > +	struct mutex            fault_lock;
> >  	bool			v2;
> >  	bool			nesting;
> >  };
> > @@ -1452,6 +1455,7 @@ static void *vfio_iommu_type1_open(unsigned long arg)
> >  	INIT_LIST_HEAD(&iommu->domain_list);
> >  	iommu->dma_list = RB_ROOT;
> >  	mutex_init(&iommu->lock);
> > +	mutex_init(&iommu->fault_lock);
> >  	BLOCKING_INIT_NOTIFIER_HEAD(&iommu->notifier);
> >  
> >  	return iommu;
> > @@ -1582,6 +1586,47 @@ static long vfio_iommu_type1_ioctl(void *iommu_data,
> >  
> >  		return copy_to_user((void __user *)arg, &unmap, minsz) ?
> >  			-EFAULT : 0;
> > +	} else if (cmd == VFIO_IOMMU_SET_FAULT_EVENTFD) {
> > +		struct vfio_iommu_type1_set_fault_eventfd eventfd;
> > +		int fd;
> > +		int ret;
> > +
> > +		minsz = offsetofend(struct vfio_iommu_type1_set_fault_eventfd,
> > +				    fd);
> > +
> > +		if (copy_from_user(&eventfd, (void __user *)arg, minsz))
> > +			return -EFAULT;
> > +
> > +		if (eventfd.argsz < minsz)
> > +			return -EINVAL;
> > +
> > +
> > +		mutex_lock(&iommu->fault_lock);
> > +
> > +		fd = eventfd.fd;
> > +		if (fd == -1) {
> > +			if (iommu->iommu_fault_fd)
> > +				eventfd_ctx_put(iommu->iommu_fault_fd);
> > +			iommu->iommu_fault_fd = NULL;
> > +			ret = 0;
> > +		} else if (fd >= 0) {
> > +			struct eventfd_ctx *ctx;
> > +
> > +			ctx = eventfd_ctx_fdget(fd);
> > +			if (IS_ERR(ctx))
> > +				return PTR_ERR(ctx);
> > +
> > +			if (ctx)
> > +				eventfd_ctx_put(ctx);
> > +
> > +			iommu->iommu_fault_fd = ctx;
> > +			ret = 0;
> > +		} else
> > +			ret = -EINVAL;
> > +
> > +		mutex_unlock(&iommu->fault_lock);
> > +
> > +		return ret;
> >  	}
> >  
> >  	return -ENOTTY;
> > diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h
> > index 519eff3..8616334 100644
> > --- a/include/uapi/linux/vfio.h
> > +++ b/include/uapi/linux/vfio.h
> > @@ -547,6 +547,21 @@ struct vfio_iommu_type1_dma_unmap {
> >  #define VFIO_IOMMU_ENABLE	_IO(VFIO_TYPE, VFIO_BASE + 15)
> >  #define VFIO_IOMMU_DISABLE	_IO(VFIO_TYPE, VFIO_BASE + 16)
> >  
> > +/*
> > + * VFIO_IOMMU_SET_FAULT_EVENT_FD	_IO(VFIO_TYPE, VFIO_BASE + 17)
> > + *
> > + * Receive eventfd from userspace to notify fault event from IOMMU.
> > + */
> > +struct vfio_iommu_type1_set_fault_eventfd {
> > +	__u32	argsz;
> > +	__u32   flags;
> > +/* What IOMMU Fault events should be reported. */
> > +#define VFIO_IOMMU_UR_FAULT_WITHOUT_PASID (1 << 0)
> > +	__s32	fd;			/* Eventfd from user space */
> > +};
> > +
> > +#define VFIO_IOMMU_SET_FAULT_EVENTFD	_IO(VFIO_TYPE, VFIO_BASE + 17)
> > +
> >  /* -------- Additional API for SPAPR TCE (Server POWERPC) IOMMU -------- */  
> 
> How about simply doing
> 
>         VFIO_PCI_MSIX_IRQ_INDEX,
>         VFIO_PCI_ERR_IRQ_INDEX,
>         VFIO_PCI_REQ_IRQ_INDEX,
> +       VFIO_PCI_IOMMU_UR_FAULT_WITHOUT_PASID_IRQ_INDEX,
>         VFIO_PCI_NUM_IRQS
> 
> 
> instead?
> 
> This way you don't need to write a whole new kernel/userspace
> interface.

It seems there are pros and cons either way, but this is fundamentally
different, making the iommu fault exposed on the device rather than the
container.  The container represents the iommu state, so it makes some
sense to report the fault for the container.  On the other hand if
it's reported per device there's no matching of source ID to a
device.  It certainly affects how the iommu fault reporting is designed,
whether it's a callback added to the device driver or a component of
the IOMMU API.  Thanks,

Alex
Yi Liu Feb. 21, 2017, 6:11 a.m. UTC | #5
> -----Original Message-----
> From: Michael S. Tsirkin [mailto:mst@redhat.com]
> Sent: Tuesday, February 21, 2017 1:48 PM
> To: Lan, Tianyu <tianyu.lan@intel.com>
> Cc: kvm@vger.kernel.org; Tian, Kevin <kevin.tian@intel.com>;
> jan.kiszka@siemens.com; jasowang@redhat.com; peterx@redhat.com;
> david@gibson.dropbear.id.au; alex.williamson@redhat.com; Liu, Yi L
> <yi.l.liu@intel.com>
> Subject: Re: [RFC PATCH 1/3] VFIO: Add new cmd to receive eventfd from
> userspace to notify IOMMU fault event
> 
> On Sun, Feb 19, 2017 at 10:47:07PM +0800, Lan Tianyu wrote:
> > This patch is to receive IOMMU fault eventfd and IOMMU fault event
> > flag from userspace. VFIO should register IOMMU fault event handler
> > according fault event flag which designates what kind of fault events
> > need to be reported.  When VFIO get IOMMU fault event from IOMMU
> > driver, it should notify userspace via received fd.
> >
> > Signed-off-by: Lan Tianyu <tianyu.lan@intel.com>
> > ---
> >  drivers/vfio/vfio_iommu_type1.c | 45
> +++++++++++++++++++++++++++++++++++++++++
> >  include/uapi/linux/vfio.h       | 15 ++++++++++++++
> >  2 files changed, 60 insertions(+)
> >
> > diff --git a/drivers/vfio/vfio_iommu_type1.c
> > b/drivers/vfio/vfio_iommu_type1.c index b3cc33f..46674ea 100644
> > --- a/drivers/vfio/vfio_iommu_type1.c
> > +++ b/drivers/vfio/vfio_iommu_type1.c
> > @@ -38,6 +38,7 @@
> >  #include <linux/workqueue.h>
> >  #include <linux/mdev.h>
> >  #include <linux/notifier.h>
> > +#include <linux/eventfd.h>
> >
> >  #define DRIVER_VERSION  "0.2"
> >  #define DRIVER_AUTHOR   "Alex Williamson <alex.williamson@redhat.com>"
> > @@ -61,6 +62,8 @@ struct vfio_iommu {
> >  	struct mutex		lock;
> >  	struct rb_root		dma_list;
> >  	struct blocking_notifier_head notifier;
> > +	struct eventfd_ctx	*iommu_fault_fd;
> > +	struct mutex            fault_lock;
> >  	bool			v2;
> >  	bool			nesting;
> >  };
> > @@ -1452,6 +1455,7 @@ static void *vfio_iommu_type1_open(unsigned
> long arg)
> >  	INIT_LIST_HEAD(&iommu->domain_list);
> >  	iommu->dma_list = RB_ROOT;
> >  	mutex_init(&iommu->lock);
> > +	mutex_init(&iommu->fault_lock);
> >  	BLOCKING_INIT_NOTIFIER_HEAD(&iommu->notifier);
> >
> >  	return iommu;
> > @@ -1582,6 +1586,47 @@ static long vfio_iommu_type1_ioctl(void
> > *iommu_data,
> >
> >  		return copy_to_user((void __user *)arg, &unmap, minsz) ?
> >  			-EFAULT : 0;
> > +	} else if (cmd == VFIO_IOMMU_SET_FAULT_EVENTFD) {
> > +		struct vfio_iommu_type1_set_fault_eventfd eventfd;
> > +		int fd;
> > +		int ret;
> > +
> > +		minsz = offsetofend(struct
> vfio_iommu_type1_set_fault_eventfd,
> > +				    fd);
> > +
> > +		if (copy_from_user(&eventfd, (void __user *)arg, minsz))
> > +			return -EFAULT;
> > +
> > +		if (eventfd.argsz < minsz)
> > +			return -EINVAL;
> > +
> > +
> > +		mutex_lock(&iommu->fault_lock);
> > +
> > +		fd = eventfd.fd;
> > +		if (fd == -1) {
> > +			if (iommu->iommu_fault_fd)
> > +				eventfd_ctx_put(iommu->iommu_fault_fd);
> > +			iommu->iommu_fault_fd = NULL;
> > +			ret = 0;
> > +		} else if (fd >= 0) {
> > +			struct eventfd_ctx *ctx;
> > +
> > +			ctx = eventfd_ctx_fdget(fd);
> > +			if (IS_ERR(ctx))
> > +				return PTR_ERR(ctx);
> > +
> > +			if (ctx)
> > +				eventfd_ctx_put(ctx);
> > +
> > +			iommu->iommu_fault_fd = ctx;
> > +			ret = 0;
> > +		} else
> > +			ret = -EINVAL;
> > +
> > +		mutex_unlock(&iommu->fault_lock);
> > +
> > +		return ret;
> >  	}
> >
> >  	return -ENOTTY;
> > diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h
> > index 519eff3..8616334 100644
> > --- a/include/uapi/linux/vfio.h
> > +++ b/include/uapi/linux/vfio.h
> > @@ -547,6 +547,21 @@ struct vfio_iommu_type1_dma_unmap {
> >  #define VFIO_IOMMU_ENABLE	_IO(VFIO_TYPE, VFIO_BASE + 15)
> >  #define VFIO_IOMMU_DISABLE	_IO(VFIO_TYPE, VFIO_BASE + 16)
> >
> > +/*
> > + * VFIO_IOMMU_SET_FAULT_EVENT_FD	_IO(VFIO_TYPE, VFIO_BASE + 17)
> > + *
> > + * Receive eventfd from userspace to notify fault event from IOMMU.
> > + */
> > +struct vfio_iommu_type1_set_fault_eventfd {
> > +	__u32	argsz;
> > +	__u32   flags;
> > +/* What IOMMU Fault events should be reported. */ #define
> > +VFIO_IOMMU_UR_FAULT_WITHOUT_PASID (1 << 0)
> > +	__s32	fd;			/* Eventfd from user space */
> > +};
> > +
> > +#define VFIO_IOMMU_SET_FAULT_EVENTFD	_IO(VFIO_TYPE,
> VFIO_BASE + 17)
> > +
> >  /* -------- Additional API for SPAPR TCE (Server POWERPC) IOMMU
> > -------- */
> 
> How about simply doing
> 
>         VFIO_PCI_MSIX_IRQ_INDEX,
>         VFIO_PCI_ERR_IRQ_INDEX,
>         VFIO_PCI_REQ_IRQ_INDEX,
> +       VFIO_PCI_IOMMU_UR_FAULT_WITHOUT_PASID_IRQ_INDEX,
>         VFIO_PCI_NUM_IRQS
> 
> 
> instead?
> 
> This way you don't need to write a whole new kernel/userspace interface.

hmmm, it's a practical way. However, the IRQ framework is designed to be working
per-device. If we have multi-assigned device, it will introduce multi fault irq. Howevr,
the fault from pIOMMU should be per-IOMMU or at least per-domain. So I think we'd
better to have separate kernel/userspace interface although I admit the new interface
is similar with the one you suggested.

Thanks,
Yi L
diff mbox

Patch

diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c
index b3cc33f..46674ea 100644
--- a/drivers/vfio/vfio_iommu_type1.c
+++ b/drivers/vfio/vfio_iommu_type1.c
@@ -38,6 +38,7 @@ 
 #include <linux/workqueue.h>
 #include <linux/mdev.h>
 #include <linux/notifier.h>
+#include <linux/eventfd.h>
 
 #define DRIVER_VERSION  "0.2"
 #define DRIVER_AUTHOR   "Alex Williamson <alex.williamson@redhat.com>"
@@ -61,6 +62,8 @@  struct vfio_iommu {
 	struct mutex		lock;
 	struct rb_root		dma_list;
 	struct blocking_notifier_head notifier;
+	struct eventfd_ctx	*iommu_fault_fd;
+	struct mutex            fault_lock;
 	bool			v2;
 	bool			nesting;
 };
@@ -1452,6 +1455,7 @@  static void *vfio_iommu_type1_open(unsigned long arg)
 	INIT_LIST_HEAD(&iommu->domain_list);
 	iommu->dma_list = RB_ROOT;
 	mutex_init(&iommu->lock);
+	mutex_init(&iommu->fault_lock);
 	BLOCKING_INIT_NOTIFIER_HEAD(&iommu->notifier);
 
 	return iommu;
@@ -1582,6 +1586,47 @@  static long vfio_iommu_type1_ioctl(void *iommu_data,
 
 		return copy_to_user((void __user *)arg, &unmap, minsz) ?
 			-EFAULT : 0;
+	} else if (cmd == VFIO_IOMMU_SET_FAULT_EVENTFD) {
+		struct vfio_iommu_type1_set_fault_eventfd eventfd;
+		int fd;
+		int ret;
+
+		minsz = offsetofend(struct vfio_iommu_type1_set_fault_eventfd,
+				    fd);
+
+		if (copy_from_user(&eventfd, (void __user *)arg, minsz))
+			return -EFAULT;
+
+		if (eventfd.argsz < minsz)
+			return -EINVAL;
+
+
+		mutex_lock(&iommu->fault_lock);
+
+		fd = eventfd.fd;
+		if (fd == -1) {
+			if (iommu->iommu_fault_fd)
+				eventfd_ctx_put(iommu->iommu_fault_fd);
+			iommu->iommu_fault_fd = NULL;
+			ret = 0;
+		} else if (fd >= 0) {
+			struct eventfd_ctx *ctx;
+
+			ctx = eventfd_ctx_fdget(fd);
+			if (IS_ERR(ctx))
+				return PTR_ERR(ctx);
+
+			if (ctx)
+				eventfd_ctx_put(ctx);
+
+			iommu->iommu_fault_fd = ctx;
+			ret = 0;
+		} else
+			ret = -EINVAL;
+
+		mutex_unlock(&iommu->fault_lock);
+
+		return ret;
 	}
 
 	return -ENOTTY;
diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h
index 519eff3..8616334 100644
--- a/include/uapi/linux/vfio.h
+++ b/include/uapi/linux/vfio.h
@@ -547,6 +547,21 @@  struct vfio_iommu_type1_dma_unmap {
 #define VFIO_IOMMU_ENABLE	_IO(VFIO_TYPE, VFIO_BASE + 15)
 #define VFIO_IOMMU_DISABLE	_IO(VFIO_TYPE, VFIO_BASE + 16)
 
+/*
+ * VFIO_IOMMU_SET_FAULT_EVENT_FD	_IO(VFIO_TYPE, VFIO_BASE + 17)
+ *
+ * Receive eventfd from userspace to notify fault event from IOMMU.
+ */
+struct vfio_iommu_type1_set_fault_eventfd {
+	__u32	argsz;
+	__u32   flags;
+/* What IOMMU Fault events should be reported. */
+#define VFIO_IOMMU_UR_FAULT_WITHOUT_PASID (1 << 0)
+	__s32	fd;			/* Eventfd from user space */
+};
+
+#define VFIO_IOMMU_SET_FAULT_EVENTFD	_IO(VFIO_TYPE, VFIO_BASE + 17)
+
 /* -------- Additional API for SPAPR TCE (Server POWERPC) IOMMU -------- */
 
 /*