diff mbox series

[V6,vfio,05/10] vfio: Introduce the DMA logging feature support

Message ID 20220905105852.26398-6-yishaih@nvidia.com (mailing list archive)
State New, archived
Headers show
Series Add device DMA logging support for mlx5 driver | expand

Commit Message

Yishai Hadas Sept. 5, 2022, 10:58 a.m. UTC
Introduce the DMA logging feature support in the vfio core layer.

It includes the processing of the device start/stop/report DMA logging
UAPIs and calling the relevant driver 'op' to do the work.

Specifically,
Upon start, the core translates the given input ranges into an interval
tree, checks for unexpected overlapping, non aligned ranges and then
pass the translated input to the driver for start tracking the given
ranges.

Upon report, the core translates the given input user space bitmap and
page size into an IOVA kernel bitmap iterator. Then it iterates it and
call the driver to set the corresponding bits for the dirtied pages in a
specific IOVA range.

Upon stop, the driver is called to stop the previous started tracking.

The next patches from the series will introduce the mlx5 driver
implementation for the logging ops.

Signed-off-by: Yishai Hadas <yishaih@nvidia.com>
---
 drivers/vfio/Kconfig             |   1 +
 drivers/vfio/pci/vfio_pci_core.c |   5 +
 drivers/vfio/vfio_main.c         | 175 +++++++++++++++++++++++++++++++
 include/linux/vfio.h             |  21 +++-
 4 files changed, 200 insertions(+), 2 deletions(-)

Comments

Alex Williamson Sept. 6, 2022, 10:41 p.m. UTC | #1
On Mon, 5 Sep 2022 13:58:47 +0300
Yishai Hadas <yishaih@nvidia.com> wrote:
> diff --git a/include/linux/vfio.h b/include/linux/vfio.h
> index e05ddc6fe6a5..b17f2f454389 100644
> --- a/include/linux/vfio.h
> +++ b/include/linux/vfio.h
> @@ -108,6 +110,21 @@ struct vfio_migration_ops {
>  				   enum vfio_device_mig_state *curr_state);
>  };
>  
> +/**
> + * @log_start: Optional callback to ask the device start DMA logging.
> + * @log_stop: Optional callback to ask the device stop DMA logging.
> + * @log_read_and_clear: Optional callback to ask the device read
> + *         and clear the dirty DMAs in some given range.

I don't see anywhere in the core that we track the device state
relative to the DEVICE_FEATURE_DMA_LOGGING features, nor do we
explicitly put the responsibility on the driver implementation to
handle invalid user requests.  The mlx5 driver implementation appears
to do this, but maybe we should at least include a requirement here, ex.

   The vfio core implementation of the DEVICE_FEATURE_DMA_LOGGING_ set
   of features does not track logging state relative to the device,
   therefore the device implementation of vfio_log_ops must handle
   arbitrary user requests.  This includes rejecting subsequent calls
   to log_start without an intervening log_stop, as well as graceful
   handling of log_stop and log_read_and_clear from invalid states.

With something like that.

Acked-by: Alex Williamson <alex.williamson@redhat.com>

You can also add my Ack on 3, 4, and (fwiw) 6-10 as I assume this would
be a PR from Leon.  Thanks,

Alex

> + */
> +struct vfio_log_ops {
> +	int (*log_start)(struct vfio_device *device,
> +		struct rb_root_cached *ranges, u32 nnodes, u64 *page_size);
> +	int (*log_stop)(struct vfio_device *device);
> +	int (*log_read_and_clear)(struct vfio_device *device,
> +		unsigned long iova, unsigned long length,
> +		struct iova_bitmap *dirty);
> +};
> +
>  /**
>   * vfio_check_feature - Validate user input for the VFIO_DEVICE_FEATURE ioctl
>   * @flags: Arg from the device_feature op
Leon Romanovsky Sept. 7, 2022, 9:45 a.m. UTC | #2
On Tue, Sep 06, 2022 at 04:41:54PM -0600, Alex Williamson wrote:
> On Mon, 5 Sep 2022 13:58:47 +0300
> Yishai Hadas <yishaih@nvidia.com> wrote:
> > diff --git a/include/linux/vfio.h b/include/linux/vfio.h
> > index e05ddc6fe6a5..b17f2f454389 100644
> > --- a/include/linux/vfio.h
> > +++ b/include/linux/vfio.h
> > @@ -108,6 +110,21 @@ struct vfio_migration_ops {
> >  				   enum vfio_device_mig_state *curr_state);
> >  };
> >  
> > +/**
> > + * @log_start: Optional callback to ask the device start DMA logging.
> > + * @log_stop: Optional callback to ask the device stop DMA logging.
> > + * @log_read_and_clear: Optional callback to ask the device read
> > + *         and clear the dirty DMAs in some given range.
> 
> I don't see anywhere in the core that we track the device state
> relative to the DEVICE_FEATURE_DMA_LOGGING features, nor do we
> explicitly put the responsibility on the driver implementation to
> handle invalid user requests.  The mlx5 driver implementation appears
> to do this, but maybe we should at least include a requirement here, ex.
> 
>    The vfio core implementation of the DEVICE_FEATURE_DMA_LOGGING_ set
>    of features does not track logging state relative to the device,
>    therefore the device implementation of vfio_log_ops must handle
>    arbitrary user requests.  This includes rejecting subsequent calls
>    to log_start without an intervening log_stop, as well as graceful
>    handling of log_stop and log_read_and_clear from invalid states.
> 
> With something like that.
> 
> Acked-by: Alex Williamson <alex.williamson@redhat.com>
> 
> You can also add my Ack on 3, 4, and (fwiw) 6-10 as I assume this would
> be a PR from Leon.  Thanks,

Yes, it is.
I sent PR based on clean 6.0-rc4.

Thanks

> 
> Alex
> 
> > + */
> > +struct vfio_log_ops {
> > +	int (*log_start)(struct vfio_device *device,
> > +		struct rb_root_cached *ranges, u32 nnodes, u64 *page_size);
> > +	int (*log_stop)(struct vfio_device *device);
> > +	int (*log_read_and_clear)(struct vfio_device *device,
> > +		unsigned long iova, unsigned long length,
> > +		struct iova_bitmap *dirty);
> > +};
> > +
> >  /**
> >   * vfio_check_feature - Validate user input for the VFIO_DEVICE_FEATURE ioctl
> >   * @flags: Arg from the device_feature op
>
diff mbox series

Patch

diff --git a/drivers/vfio/Kconfig b/drivers/vfio/Kconfig
index 6130d00252ed..86c381ceb9a1 100644
--- a/drivers/vfio/Kconfig
+++ b/drivers/vfio/Kconfig
@@ -3,6 +3,7 @@  menuconfig VFIO
 	tristate "VFIO Non-Privileged userspace driver framework"
 	select IOMMU_API
 	select VFIO_IOMMU_TYPE1 if MMU && (X86 || S390 || ARM || ARM64)
+	select INTERVAL_TREE
 	help
 	  VFIO provides a framework for secure userspace device drivers.
 	  See Documentation/driver-api/vfio.rst for more details.
diff --git a/drivers/vfio/pci/vfio_pci_core.c b/drivers/vfio/pci/vfio_pci_core.c
index 0d4b49f06b14..0a801aee2f2d 100644
--- a/drivers/vfio/pci/vfio_pci_core.c
+++ b/drivers/vfio/pci/vfio_pci_core.c
@@ -2128,6 +2128,11 @@  int vfio_pci_core_register_device(struct vfio_pci_core_device *vdev)
 			return -EINVAL;
 	}
 
+	if (vdev->vdev.log_ops && !(vdev->vdev.log_ops->log_start &&
+	    vdev->vdev.log_ops->log_stop &&
+	    vdev->vdev.log_ops->log_read_and_clear))
+		return -EINVAL;
+
 	/*
 	 * Prevent binding to PFs with VFs enabled, the VFs might be in use
 	 * by the host or other users.  We cannot capture the VFs if they
diff --git a/drivers/vfio/vfio_main.c b/drivers/vfio/vfio_main.c
index 77264d836d52..27d9186f35d5 100644
--- a/drivers/vfio/vfio_main.c
+++ b/drivers/vfio/vfio_main.c
@@ -33,6 +33,8 @@ 
 #include <linux/wait.h>
 #include <linux/sched/signal.h>
 #include <linux/pm_runtime.h>
+#include <linux/interval_tree.h>
+#include <linux/iova_bitmap.h>
 #include "vfio.h"
 
 #define DRIVER_VERSION	"0.3"
@@ -1658,6 +1660,167 @@  static int vfio_ioctl_device_feature_migration(struct vfio_device *device,
 	return 0;
 }
 
+/* Ranges should fit into a single kernel page */
+#define LOG_MAX_RANGES \
+	(PAGE_SIZE / sizeof(struct vfio_device_feature_dma_logging_range))
+
+static int
+vfio_ioctl_device_feature_logging_start(struct vfio_device *device,
+					u32 flags, void __user *arg,
+					size_t argsz)
+{
+	size_t minsz =
+		offsetofend(struct vfio_device_feature_dma_logging_control,
+			    ranges);
+	struct vfio_device_feature_dma_logging_range __user *ranges;
+	struct vfio_device_feature_dma_logging_control control;
+	struct vfio_device_feature_dma_logging_range range;
+	struct rb_root_cached root = RB_ROOT_CACHED;
+	struct interval_tree_node *nodes;
+	u64 iova_end;
+	u32 nnodes;
+	int i, ret;
+
+	if (!device->log_ops)
+		return -ENOTTY;
+
+	ret = vfio_check_feature(flags, argsz,
+				 VFIO_DEVICE_FEATURE_SET,
+				 sizeof(control));
+	if (ret != 1)
+		return ret;
+
+	if (copy_from_user(&control, arg, minsz))
+		return -EFAULT;
+
+	nnodes = control.num_ranges;
+	if (!nnodes)
+		return -EINVAL;
+
+	if (nnodes > LOG_MAX_RANGES)
+		return -E2BIG;
+
+	ranges = u64_to_user_ptr(control.ranges);
+	nodes = kmalloc_array(nnodes, sizeof(struct interval_tree_node),
+			      GFP_KERNEL);
+	if (!nodes)
+		return -ENOMEM;
+
+	for (i = 0; i < nnodes; i++) {
+		if (copy_from_user(&range, &ranges[i], sizeof(range))) {
+			ret = -EFAULT;
+			goto end;
+		}
+		if (!IS_ALIGNED(range.iova, control.page_size) ||
+		    !IS_ALIGNED(range.length, control.page_size)) {
+			ret = -EINVAL;
+			goto end;
+		}
+
+		if (check_add_overflow(range.iova, range.length, &iova_end) ||
+		    iova_end > ULONG_MAX) {
+			ret = -EOVERFLOW;
+			goto end;
+		}
+
+		nodes[i].start = range.iova;
+		nodes[i].last = range.iova + range.length - 1;
+		if (interval_tree_iter_first(&root, nodes[i].start,
+					     nodes[i].last)) {
+			/* Range overlapping */
+			ret = -EINVAL;
+			goto end;
+		}
+		interval_tree_insert(nodes + i, &root);
+	}
+
+	ret = device->log_ops->log_start(device, &root, nnodes,
+					 &control.page_size);
+	if (ret)
+		goto end;
+
+	if (copy_to_user(arg, &control, sizeof(control))) {
+		ret = -EFAULT;
+		device->log_ops->log_stop(device);
+	}
+
+end:
+	kfree(nodes);
+	return ret;
+}
+
+static int
+vfio_ioctl_device_feature_logging_stop(struct vfio_device *device,
+				       u32 flags, void __user *arg,
+				       size_t argsz)
+{
+	int ret;
+
+	if (!device->log_ops)
+		return -ENOTTY;
+
+	ret = vfio_check_feature(flags, argsz,
+				 VFIO_DEVICE_FEATURE_SET, 0);
+	if (ret != 1)
+		return ret;
+
+	return device->log_ops->log_stop(device);
+}
+
+static int vfio_device_log_read_and_clear(struct iova_bitmap *iter,
+					  unsigned long iova, size_t length,
+					  void *opaque)
+{
+	struct vfio_device *device = opaque;
+
+	return device->log_ops->log_read_and_clear(device, iova, length, iter);
+}
+
+static int
+vfio_ioctl_device_feature_logging_report(struct vfio_device *device,
+					 u32 flags, void __user *arg,
+					 size_t argsz)
+{
+	size_t minsz =
+		offsetofend(struct vfio_device_feature_dma_logging_report,
+			    bitmap);
+	struct vfio_device_feature_dma_logging_report report;
+	struct iova_bitmap *iter;
+	u64 iova_end;
+	int ret;
+
+	if (!device->log_ops)
+		return -ENOTTY;
+
+	ret = vfio_check_feature(flags, argsz,
+				 VFIO_DEVICE_FEATURE_GET,
+				 sizeof(report));
+	if (ret != 1)
+		return ret;
+
+	if (copy_from_user(&report, arg, minsz))
+		return -EFAULT;
+
+	if (report.page_size < SZ_4K || !is_power_of_2(report.page_size))
+		return -EINVAL;
+
+	if (check_add_overflow(report.iova, report.length, &iova_end) ||
+	    iova_end > ULONG_MAX)
+		return -EOVERFLOW;
+
+	iter = iova_bitmap_alloc(report.iova, report.length,
+				 report.page_size,
+				 u64_to_user_ptr(report.bitmap));
+	if (IS_ERR(iter))
+		return PTR_ERR(iter);
+
+	ret = iova_bitmap_for_each(iter, device,
+				   vfio_device_log_read_and_clear);
+
+	iova_bitmap_free(iter);
+	return ret;
+}
+
 static int vfio_ioctl_device_feature(struct vfio_device *device,
 				     struct vfio_device_feature __user *arg)
 {
@@ -1691,6 +1854,18 @@  static int vfio_ioctl_device_feature(struct vfio_device *device,
 		return vfio_ioctl_device_feature_mig_device_state(
 			device, feature.flags, arg->data,
 			feature.argsz - minsz);
+	case VFIO_DEVICE_FEATURE_DMA_LOGGING_START:
+		return vfio_ioctl_device_feature_logging_start(
+			device, feature.flags, arg->data,
+			feature.argsz - minsz);
+	case VFIO_DEVICE_FEATURE_DMA_LOGGING_STOP:
+		return vfio_ioctl_device_feature_logging_stop(
+			device, feature.flags, arg->data,
+			feature.argsz - minsz);
+	case VFIO_DEVICE_FEATURE_DMA_LOGGING_REPORT:
+		return vfio_ioctl_device_feature_logging_report(
+			device, feature.flags, arg->data,
+			feature.argsz - minsz);
 	default:
 		if (unlikely(!device->ops->device_feature))
 			return -EINVAL;
diff --git a/include/linux/vfio.h b/include/linux/vfio.h
index e05ddc6fe6a5..b17f2f454389 100644
--- a/include/linux/vfio.h
+++ b/include/linux/vfio.h
@@ -14,6 +14,7 @@ 
 #include <linux/workqueue.h>
 #include <linux/poll.h>
 #include <uapi/linux/vfio.h>
+#include <linux/iova_bitmap.h>
 
 struct kvm;
 
@@ -33,10 +34,11 @@  struct vfio_device {
 	struct device *dev;
 	const struct vfio_device_ops *ops;
 	/*
-	 * mig_ops is a static property of the vfio_device which must be set
-	 * prior to registering the vfio_device.
+	 * mig_ops/log_ops is a static property of the vfio_device which must
+	 * be set prior to registering the vfio_device.
 	 */
 	const struct vfio_migration_ops *mig_ops;
+	const struct vfio_log_ops *log_ops;
 	struct vfio_group *group;
 	struct vfio_device_set *dev_set;
 	struct list_head dev_set_list;
@@ -108,6 +110,21 @@  struct vfio_migration_ops {
 				   enum vfio_device_mig_state *curr_state);
 };
 
+/**
+ * @log_start: Optional callback to ask the device start DMA logging.
+ * @log_stop: Optional callback to ask the device stop DMA logging.
+ * @log_read_and_clear: Optional callback to ask the device read
+ *         and clear the dirty DMAs in some given range.
+ */
+struct vfio_log_ops {
+	int (*log_start)(struct vfio_device *device,
+		struct rb_root_cached *ranges, u32 nnodes, u64 *page_size);
+	int (*log_stop)(struct vfio_device *device);
+	int (*log_read_and_clear)(struct vfio_device *device,
+		unsigned long iova, unsigned long length,
+		struct iova_bitmap *dirty);
+};
+
 /**
  * vfio_check_feature - Validate user input for the VFIO_DEVICE_FEATURE ioctl
  * @flags: Arg from the device_feature op