diff mbox series

[v3,6/7] iommu/virtio: Add probe request

Message ID 20181012145917.6840-7-jean-philippe.brucker@arm.com (mailing list archive)
State New, archived
Delegated to: Bjorn Helgaas
Headers show
Series Add virtio-iommu driver | expand

Commit Message

Jean-Philippe Brucker Oct. 12, 2018, 2:59 p.m. UTC
When the device offers the probe feature, send a probe request for each
device managed by the IOMMU. Extract RESV_MEM information. When we
encounter a MSI doorbell region, set it up as a IOMMU_RESV_MSI region.
This will tell other subsystems that there is no need to map the MSI
doorbell in the virtio-iommu, because MSIs bypass it.

Signed-off-by: Jean-Philippe Brucker <jean-philippe.brucker@arm.com>
---
 drivers/iommu/virtio-iommu.c      | 147 ++++++++++++++++++++++++++++--
 include/uapi/linux/virtio_iommu.h |  39 ++++++++
 2 files changed, 180 insertions(+), 6 deletions(-)

Comments

Michael S. Tsirkin Oct. 12, 2018, 4:42 p.m. UTC | #1
On Fri, Oct 12, 2018 at 03:59:16PM +0100, Jean-Philippe Brucker wrote:
> When the device offers the probe feature, send a probe request for each
> device managed by the IOMMU. Extract RESV_MEM information. When we
> encounter a MSI doorbell region, set it up as a IOMMU_RESV_MSI region.
> This will tell other subsystems that there is no need to map the MSI
> doorbell in the virtio-iommu, because MSIs bypass it.
> 
> Signed-off-by: Jean-Philippe Brucker <jean-philippe.brucker@arm.com>
> ---
>  drivers/iommu/virtio-iommu.c      | 147 ++++++++++++++++++++++++++++--
>  include/uapi/linux/virtio_iommu.h |  39 ++++++++
>  2 files changed, 180 insertions(+), 6 deletions(-)
> 
> diff --git a/drivers/iommu/virtio-iommu.c b/drivers/iommu/virtio-iommu.c
> index 9fb38cd3b727..8eaf66770469 100644
> --- a/drivers/iommu/virtio-iommu.c
> +++ b/drivers/iommu/virtio-iommu.c
> @@ -56,6 +56,7 @@ struct viommu_dev {
>  	struct iommu_domain_geometry	geometry;
>  	u64				pgsize_bitmap;
>  	u8				domain_bits;
> +	u32				probe_size;
>  };
>  
>  struct viommu_mapping {
> @@ -77,8 +78,10 @@ struct viommu_domain {
>  };
>  
>  struct viommu_endpoint {
> +	struct device			*dev;
>  	struct viommu_dev		*viommu;
>  	struct viommu_domain		*vdomain;
> +	struct list_head		resv_regions;
>  };
>  
>  struct viommu_request {
> @@ -129,6 +132,9 @@ static off_t viommu_get_req_offset(struct viommu_dev *viommu,
>  {
>  	size_t tail_size = sizeof(struct virtio_iommu_req_tail);
>  
> +	if (req->type == VIRTIO_IOMMU_T_PROBE)
> +		return len - viommu->probe_size - tail_size;
> +
>  	return len - tail_size;
>  }
>  
> @@ -414,6 +420,101 @@ static int viommu_replay_mappings(struct viommu_domain *vdomain)
>  	return ret;
>  }
>  
> +static int viommu_add_resv_mem(struct viommu_endpoint *vdev,
> +			       struct virtio_iommu_probe_resv_mem *mem,
> +			       size_t len)
> +{
> +	struct iommu_resv_region *region = NULL;
> +	unsigned long prot = IOMMU_WRITE | IOMMU_NOEXEC | IOMMU_MMIO;
> +
> +	u64 start = le64_to_cpu(mem->start);
> +	u64 end = le64_to_cpu(mem->end);
> +	size_t size = end - start + 1;
> +
> +	if (len < sizeof(*mem))
> +		return -EINVAL;


Maybe validate that size, end and start all make sense
and e.g. fit within phys_addr_t and size_t?

> +
> +	switch (mem->subtype) {
> +	default:
> +		dev_warn(vdev->dev, "unknown resv mem subtype 0x%x\n",
> +			 mem->subtype);
> +		/* Fall-through */
> +	case VIRTIO_IOMMU_RESV_MEM_T_RESERVED:
> +		region = iommu_alloc_resv_region(start, size, 0,
> +						 IOMMU_RESV_RESERVED);
> +		break;
> +	case VIRTIO_IOMMU_RESV_MEM_T_MSI:
> +		region = iommu_alloc_resv_region(start, size, prot,
> +						 IOMMU_RESV_MSI);
> +		break;
> +	}
> +
> +	list_add(&vdev->resv_regions, &region->list);
> +	return 0;
> +}
> +
> +static int viommu_probe_endpoint(struct viommu_dev *viommu, struct device *dev)
> +{
> +	int ret;
> +	u16 type, len;
> +	size_t cur = 0;
> +	size_t probe_len;
> +	struct virtio_iommu_req_probe *probe;
> +	struct virtio_iommu_probe_property *prop;
> +	struct iommu_fwspec *fwspec = dev->iommu_fwspec;
> +	struct viommu_endpoint *vdev = fwspec->iommu_priv;
> +
> +	if (!fwspec->num_ids)
> +		return -EINVAL;
> +
> +	probe_len = sizeof(*probe) + viommu->probe_size +
> +		    sizeof(struct virtio_iommu_req_tail);
> +	probe = kzalloc(probe_len, GFP_KERNEL);
> +	if (!probe)
> +		return -ENOMEM;
> +
> +	probe->head.type = VIRTIO_IOMMU_T_PROBE;
> +	/*
> +	 * For now, assume that properties of an endpoint that outputs multiple
> +	 * IDs are consistent. Only probe the first one.
> +	 */
> +	probe->endpoint = cpu_to_le32(fwspec->ids[0]);
> +
> +	ret = viommu_send_req_sync(viommu, probe, probe_len);
> +	if (ret)
> +		goto out_free;
> +
> +	prop = (void *)probe->properties;
> +	type = le16_to_cpu(prop->type) & VIRTIO_IOMMU_PROBE_T_MASK;
> +
> +	while (type != VIRTIO_IOMMU_PROBE_T_NONE &&
> +	       cur < viommu->probe_size) {
> +		len = le16_to_cpu(prop->length) + sizeof(*prop);
> +
> +		switch (type) {
> +		case VIRTIO_IOMMU_PROBE_T_RESV_MEM:
> +			ret = viommu_add_resv_mem(vdev, (void *)prop, len);
> +			break;
> +		default:
> +			dev_err(dev, "unknown viommu prop 0x%x\n", type);
> +		}
> +
> +		if (ret)
> +			dev_err(dev, "failed to parse viommu prop 0x%x\n", type);
> +
> +		cur += len;
> +		if (cur >= viommu->probe_size)
> +			break;
> +
> +		prop = (void *)probe->properties + cur;
> +		type = le16_to_cpu(prop->type) & VIRTIO_IOMMU_PROBE_T_MASK;
> +	}
> +
> +out_free:
> +	kfree(probe);
> +	return ret;
> +}
> +
>  /* IOMMU API */
>  
>  static struct iommu_domain *viommu_domain_alloc(unsigned type)
> @@ -636,15 +737,33 @@ static void viommu_iotlb_sync(struct iommu_domain *domain)
>  
>  static void viommu_get_resv_regions(struct device *dev, struct list_head *head)
>  {
> -	struct iommu_resv_region *region;
> +	struct iommu_resv_region *entry, *new_entry, *msi = NULL;
> +	struct viommu_endpoint *vdev = dev->iommu_fwspec->iommu_priv;
>  	int prot = IOMMU_WRITE | IOMMU_NOEXEC | IOMMU_MMIO;
>  
> -	region = iommu_alloc_resv_region(MSI_IOVA_BASE, MSI_IOVA_LENGTH, prot,
> -					 IOMMU_RESV_SW_MSI);
> -	if (!region)
> -		return;
> +	list_for_each_entry(entry, &vdev->resv_regions, list) {
> +		/*
> +		 * If the device registered a bypass MSI windows, use it.
> +		 * Otherwise add a software-mapped region
> +		 */
> +		if (entry->type == IOMMU_RESV_MSI)
> +			msi = entry;
> +
> +		new_entry = kmemdup(entry, sizeof(*entry), GFP_KERNEL);
> +		if (!new_entry)
> +			return;
> +		list_add_tail(&new_entry->list, head);
> +	}
> +
> +	if (!msi) {
> +		msi = iommu_alloc_resv_region(MSI_IOVA_BASE, MSI_IOVA_LENGTH,
> +					      prot, IOMMU_RESV_SW_MSI);
> +		if (!msi)
> +			return;
> +
> +		list_add_tail(&msi->list, head);
> +	}
>  
> -	list_add_tail(&region->list, head);
>  	iommu_dma_get_resv_regions(dev, head);
>  }
>  
> @@ -692,9 +811,18 @@ static int viommu_add_device(struct device *dev)
>  	if (!vdev)
>  		return -ENOMEM;
>  
> +	vdev->dev = dev;
>  	vdev->viommu = viommu;
> +	INIT_LIST_HEAD(&vdev->resv_regions);
>  	fwspec->iommu_priv = vdev;
>  
> +	if (viommu->probe_size) {
> +		/* Get additional information for this endpoint */
> +		ret = viommu_probe_endpoint(viommu, dev);
> +		if (ret)
> +			return ret;
> +	}
> +
>  	ret = iommu_device_link(&viommu->iommu, dev);
>  	if (ret)
>  		goto err_free_dev;
> @@ -717,6 +845,7 @@ static int viommu_add_device(struct device *dev)
>  	iommu_device_unlink(&viommu->iommu, dev);
>  
>  err_free_dev:
> +	viommu_put_resv_regions(dev, &vdev->resv_regions);
>  	kfree(vdev);
>  
>  	return ret;
> @@ -734,6 +863,7 @@ static void viommu_remove_device(struct device *dev)
>  
>  	iommu_group_remove_device(dev);
>  	iommu_device_unlink(&vdev->viommu->iommu, dev);
> +	viommu_put_resv_regions(dev, &vdev->resv_regions);
>  	kfree(vdev);
>  }
>  
> @@ -832,6 +962,10 @@ static int viommu_probe(struct virtio_device *vdev)
>  			     struct virtio_iommu_config, domain_bits,
>  			     &viommu->domain_bits);
>  
> +	virtio_cread_feature(vdev, VIRTIO_IOMMU_F_PROBE,
> +			     struct virtio_iommu_config, probe_size,
> +			     &viommu->probe_size);
> +
>  	viommu->geometry = (struct iommu_domain_geometry) {
>  		.aperture_start	= input_start,
>  		.aperture_end	= input_end,
> @@ -913,6 +1047,7 @@ static unsigned int features[] = {
>  	VIRTIO_IOMMU_F_MAP_UNMAP,
>  	VIRTIO_IOMMU_F_DOMAIN_BITS,
>  	VIRTIO_IOMMU_F_INPUT_RANGE,
> +	VIRTIO_IOMMU_F_PROBE,
>  };
>  
>  static struct virtio_device_id id_table[] = {
> diff --git a/include/uapi/linux/virtio_iommu.h b/include/uapi/linux/virtio_iommu.h
> index e808fc7fbe82..feed74586bb0 100644
> --- a/include/uapi/linux/virtio_iommu.h
> +++ b/include/uapi/linux/virtio_iommu.h
> @@ -14,6 +14,7 @@
>  #define VIRTIO_IOMMU_F_DOMAIN_BITS		1
>  #define VIRTIO_IOMMU_F_MAP_UNMAP		2
>  #define VIRTIO_IOMMU_F_BYPASS			3
> +#define VIRTIO_IOMMU_F_PROBE			4
>  
>  struct virtio_iommu_config {
>  	/* Supported page sizes */
> @@ -25,6 +26,9 @@ struct virtio_iommu_config {
>  	} input_range;
>  	/* Max domain ID size */
>  	__u8					domain_bits;
> +	__u8					padding[3];
> +	/* Probe buffer size */

Oh so you do add the padding in this patch, it's just a
question of moving this line to the earlier patch then.

> +	__u32					probe_size;
>  };
>  
>  /* Request types */
> @@ -32,6 +36,7 @@ struct virtio_iommu_config {
>  #define VIRTIO_IOMMU_T_DETACH			0x02
>  #define VIRTIO_IOMMU_T_MAP			0x03
>  #define VIRTIO_IOMMU_T_UNMAP			0x04
> +#define VIRTIO_IOMMU_T_PROBE			0x05
>  
>  /* Status types */
>  #define VIRTIO_IOMMU_S_OK			0x00
> @@ -98,4 +103,38 @@ struct virtio_iommu_req_unmap {
>  	struct virtio_iommu_req_tail		tail;
>  };
>  
> +#define VIRTIO_IOMMU_PROBE_T_NONE		0
> +#define VIRTIO_IOMMU_PROBE_T_RESV_MEM		1
> +
> +#define VIRTIO_IOMMU_PROBE_T_MASK		0xfff
> +
> +struct virtio_iommu_probe_property {
> +	__le16					type;
> +	__le16					length;
> +};
> +
> +#define VIRTIO_IOMMU_RESV_MEM_T_RESERVED	0
> +#define VIRTIO_IOMMU_RESV_MEM_T_MSI		1
> +
> +struct virtio_iommu_probe_resv_mem {
> +	struct virtio_iommu_probe_property	head;
> +	__u8					subtype;
> +	__u8					reserved[3];
> +	__le64					start;
> +	__le64					end;
> +};
> +
> +struct virtio_iommu_req_probe {
> +	struct virtio_iommu_req_head		head;
> +	__le32					endpoint;
> +	__u8					reserved[64];
> +
> +	__u8					properties[];
> +
> +	/*
> +	 * Tail follows the variable-length properties array. No padding,
> +	 * property lengths are all aligned on 8 bytes.
> +	 */
> +};
> +
>  #endif
> -- 
> 2.19.1
Eric Auger Nov. 8, 2018, 2:48 p.m. UTC | #2
Hi Jean-Philippe,

On 10/12/18 4:59 PM, Jean-Philippe Brucker wrote:
> When the device offers the probe feature, send a probe request for each
> device managed by the IOMMU. Extract RESV_MEM information. When we
> encounter a MSI doorbell region, set it up as a IOMMU_RESV_MSI region.
> This will tell other subsystems that there is no need to map the MSI
> doorbell in the virtio-iommu, because MSIs bypass it.
> 
> Signed-off-by: Jean-Philippe Brucker <jean-philippe.brucker@arm.com>
> ---
>  drivers/iommu/virtio-iommu.c      | 147 ++++++++++++++++++++++++++++--
>  include/uapi/linux/virtio_iommu.h |  39 ++++++++
>  2 files changed, 180 insertions(+), 6 deletions(-)
> 
> diff --git a/drivers/iommu/virtio-iommu.c b/drivers/iommu/virtio-iommu.c
> index 9fb38cd3b727..8eaf66770469 100644
> --- a/drivers/iommu/virtio-iommu.c
> +++ b/drivers/iommu/virtio-iommu.c
> @@ -56,6 +56,7 @@ struct viommu_dev {
>  	struct iommu_domain_geometry	geometry;
>  	u64				pgsize_bitmap;
>  	u8				domain_bits;
> +	u32				probe_size;
>  };
>  
>  struct viommu_mapping {
> @@ -77,8 +78,10 @@ struct viommu_domain {
>  };
>  
>  struct viommu_endpoint {
> +	struct device			*dev;
>  	struct viommu_dev		*viommu;
>  	struct viommu_domain		*vdomain;
> +	struct list_head		resv_regions;
>  };
>  
>  struct viommu_request {
> @@ -129,6 +132,9 @@ static off_t viommu_get_req_offset(struct viommu_dev *viommu,
>  {
>  	size_t tail_size = sizeof(struct virtio_iommu_req_tail);
>  
> +	if (req->type == VIRTIO_IOMMU_T_PROBE)
> +		return len - viommu->probe_size - tail_size;
> +
>  	return len - tail_size;
>  }
>  
> @@ -414,6 +420,101 @@ static int viommu_replay_mappings(struct viommu_domain *vdomain)
>  	return ret;
>  }
>  
> +static int viommu_add_resv_mem(struct viommu_endpoint *vdev,
> +			       struct virtio_iommu_probe_resv_mem *mem,
> +			       size_t len)
> +{
> +	struct iommu_resv_region *region = NULL;
> +	unsigned long prot = IOMMU_WRITE | IOMMU_NOEXEC | IOMMU_MMIO;
> +
> +	u64 start = le64_to_cpu(mem->start);
> +	u64 end = le64_to_cpu(mem->end);
> +	size_t size = end - start + 1;
> +
> +	if (len < sizeof(*mem))
> +		return -EINVAL;
> +
> +	switch (mem->subtype) {
> +	default:
> +		dev_warn(vdev->dev, "unknown resv mem subtype 0x%x\n",
> +			 mem->subtype);
> +		/* Fall-through */
> +	case VIRTIO_IOMMU_RESV_MEM_T_RESERVED:
> +		region = iommu_alloc_resv_region(start, size, 0,
> +						 IOMMU_RESV_RESERVED);
> +		break;
> +	case VIRTIO_IOMMU_RESV_MEM_T_MSI:
> +		region = iommu_alloc_resv_region(start, size, prot,
> +						 IOMMU_RESV_MSI);
> +		break;
> +	}
> +
> +	list_add(&vdev->resv_regions, &region->list);
> +	return 0;
> +}
> +
> +static int viommu_probe_endpoint(struct viommu_dev *viommu, struct device *dev)
> +{
> +	int ret;
> +	u16 type, len;
> +	size_t cur = 0;
> +	size_t probe_len;
> +	struct virtio_iommu_req_probe *probe;
> +	struct virtio_iommu_probe_property *prop;
> +	struct iommu_fwspec *fwspec = dev->iommu_fwspec;
> +	struct viommu_endpoint *vdev = fwspec->iommu_priv;
> +
> +	if (!fwspec->num_ids)
> +		return -EINVAL;
> +
> +	probe_len = sizeof(*probe) + viommu->probe_size +
> +		    sizeof(struct virtio_iommu_req_tail);
> +	probe = kzalloc(probe_len, GFP_KERNEL);
> +	if (!probe)
> +		return -ENOMEM;
> +
> +	probe->head.type = VIRTIO_IOMMU_T_PROBE;
> +	/*
> +	 * For now, assume that properties of an endpoint that outputs multiple
> +	 * IDs are consistent. Only probe the first one.
> +	 */
> +	probe->endpoint = cpu_to_le32(fwspec->ids[0]);
> +
> +	ret = viommu_send_req_sync(viommu, probe, probe_len);
> +	if (ret)
> +		goto out_free;
> +
> +	prop = (void *)probe->properties;
> +	type = le16_to_cpu(prop->type) & VIRTIO_IOMMU_PROBE_T_MASK;
> +
> +	while (type != VIRTIO_IOMMU_PROBE_T_NONE &&
> +	       cur < viommu->probe_size) {
> +		len = le16_to_cpu(prop->length) + sizeof(*prop);
> +
> +		switch (type) {
> +		case VIRTIO_IOMMU_PROBE_T_RESV_MEM:
> +			ret = viommu_add_resv_mem(vdev, (void *)prop, len);
> +			break;
> +		default:
> +			dev_err(dev, "unknown viommu prop 0x%x\n", type);
> +		}
> +
> +		if (ret)
> +			dev_err(dev, "failed to parse viommu prop 0x%x\n", type);
> +
> +		cur += len;
> +		if (cur >= viommu->probe_size)
> +			break;
> +
> +		prop = (void *)probe->properties + cur;
> +		type = le16_to_cpu(prop->type) & VIRTIO_IOMMU_PROBE_T_MASK;
> +	}
> +
> +out_free:
> +	kfree(probe);
> +	return ret;
> +}
> +
>  /* IOMMU API */
>  
>  static struct iommu_domain *viommu_domain_alloc(unsigned type)
> @@ -636,15 +737,33 @@ static void viommu_iotlb_sync(struct iommu_domain *domain)
>  
>  static void viommu_get_resv_regions(struct device *dev, struct list_head *head)
>  {
> -	struct iommu_resv_region *region;
> +	struct iommu_resv_region *entry, *new_entry, *msi = NULL;
> +	struct viommu_endpoint *vdev = dev->iommu_fwspec->iommu_priv;
>  	int prot = IOMMU_WRITE | IOMMU_NOEXEC | IOMMU_MMIO;
>  
> -	region = iommu_alloc_resv_region(MSI_IOVA_BASE, MSI_IOVA_LENGTH, prot,
> -					 IOMMU_RESV_SW_MSI);
> -	if (!region)
> -		return;
> +	list_for_each_entry(entry, &vdev->resv_regions, list) {
> +		/*
> +		 * If the device registered a bypass MSI windows, use it.
> +		 * Otherwise add a software-mapped region
> +		 */
> +		if (entry->type == IOMMU_RESV_MSI)
> +			msi = entry;
> +
> +		new_entry = kmemdup(entry, sizeof(*entry), GFP_KERNEL);
> +		if (!new_entry)
> +			return;
> +		list_add_tail(&new_entry->list, head);
> +	}
> +
> +	if (!msi) {
> +		msi = iommu_alloc_resv_region(MSI_IOVA_BASE, MSI_IOVA_LENGTH,
> +					      prot, IOMMU_RESV_SW_MSI);
> +		if (!msi)
> +			return;
> +
> +		list_add_tail(&msi->list, head);
> +	}
>  
> -	list_add_tail(&region->list, head);
>  	iommu_dma_get_resv_regions(dev, head);
>  }
>  
> @@ -692,9 +811,18 @@ static int viommu_add_device(struct device *dev)
>  	if (!vdev)
>  		return -ENOMEM;
>  
> +	vdev->dev = dev;
>  	vdev->viommu = viommu;
> +	INIT_LIST_HEAD(&vdev->resv_regions);
>  	fwspec->iommu_priv = vdev;
>  
> +	if (viommu->probe_size) {
> +		/* Get additional information for this endpoint */
> +		ret = viommu_probe_endpoint(viommu, dev);
> +		if (ret)
> +			return ret;
> +	}
> +
>  	ret = iommu_device_link(&viommu->iommu, dev);
>  	if (ret)
>  		goto err_free_dev;
> @@ -717,6 +845,7 @@ static int viommu_add_device(struct device *dev)
>  	iommu_device_unlink(&viommu->iommu, dev);
>  
>  err_free_dev:
> +	viommu_put_resv_regions(dev, &vdev->resv_regions);
>  	kfree(vdev);
>  
>  	return ret;
> @@ -734,6 +863,7 @@ static void viommu_remove_device(struct device *dev)
>  
>  	iommu_group_remove_device(dev);
>  	iommu_device_unlink(&vdev->viommu->iommu, dev);
> +	viommu_put_resv_regions(dev, &vdev->resv_regions);
>  	kfree(vdev);
>  }
>  
> @@ -832,6 +962,10 @@ static int viommu_probe(struct virtio_device *vdev)
>  			     struct virtio_iommu_config, domain_bits,
>  			     &viommu->domain_bits);
>  
> +	virtio_cread_feature(vdev, VIRTIO_IOMMU_F_PROBE,
> +			     struct virtio_iommu_config, probe_size,
> +			     &viommu->probe_size);
> +
>  	viommu->geometry = (struct iommu_domain_geometry) {
>  		.aperture_start	= input_start,
>  		.aperture_end	= input_end,
> @@ -913,6 +1047,7 @@ static unsigned int features[] = {
>  	VIRTIO_IOMMU_F_MAP_UNMAP,
>  	VIRTIO_IOMMU_F_DOMAIN_BITS,
>  	VIRTIO_IOMMU_F_INPUT_RANGE,
> +	VIRTIO_IOMMU_F_PROBE,
>  };
>  
>  static struct virtio_device_id id_table[] = {
> diff --git a/include/uapi/linux/virtio_iommu.h b/include/uapi/linux/virtio_iommu.h
> index e808fc7fbe82..feed74586bb0 100644
> --- a/include/uapi/linux/virtio_iommu.h
> +++ b/include/uapi/linux/virtio_iommu.h
> @@ -14,6 +14,7 @@
>  #define VIRTIO_IOMMU_F_DOMAIN_BITS		1
>  #define VIRTIO_IOMMU_F_MAP_UNMAP		2
>  #define VIRTIO_IOMMU_F_BYPASS			3
> +#define VIRTIO_IOMMU_F_PROBE			4
>  
>  struct virtio_iommu_config {
>  	/* Supported page sizes */
> @@ -25,6 +26,9 @@ struct virtio_iommu_config {
>  	} input_range;
>  	/* Max domain ID size */
>  	__u8					domain_bits;
> +	__u8					padding[3];
> +	/* Probe buffer size */
> +	__u32					probe_size;
>  };
>  
>  /* Request types */
> @@ -32,6 +36,7 @@ struct virtio_iommu_config {
>  #define VIRTIO_IOMMU_T_DETACH			0x02
>  #define VIRTIO_IOMMU_T_MAP			0x03
>  #define VIRTIO_IOMMU_T_UNMAP			0x04
> +#define VIRTIO_IOMMU_T_PROBE			0x05
>  
>  /* Status types */
>  #define VIRTIO_IOMMU_S_OK			0x00
> @@ -98,4 +103,38 @@ struct virtio_iommu_req_unmap {
>  	struct virtio_iommu_req_tail		tail;
>  };
>  
> +#define VIRTIO_IOMMU_PROBE_T_NONE		0
> +#define VIRTIO_IOMMU_PROBE_T_RESV_MEM		1
> +
> +#define VIRTIO_IOMMU_PROBE_T_MASK		0xfff
> +
> +struct virtio_iommu_probe_property {
> +	__le16					type;
> +	__le16					length;
the value[] field has disappeared but still is documented in the v0.8 spec.

Thanks

Eric
> +};
> +
> +#define VIRTIO_IOMMU_RESV_MEM_T_RESERVED	0
> +#define VIRTIO_IOMMU_RESV_MEM_T_MSI		1
> +
> +struct virtio_iommu_probe_resv_mem {
> +	struct virtio_iommu_probe_property	head;
> +	__u8					subtype;
> +	__u8					reserved[3];
> +	__le64					start;
> +	__le64					end;
> +};
> +
> +struct virtio_iommu_req_probe {
> +	struct virtio_iommu_req_head		head;
> +	__le32					endpoint;
> +	__u8					reserved[64];
> +
> +	__u8					properties[];
> +
> +	/*
> +	 * Tail follows the variable-length properties array. No padding,
> +	 * property lengths are all aligned on 8 bytes.
> +	 */
> +};
> +
>  #endif
>
Jean-Philippe Brucker Nov. 8, 2018, 4:46 p.m. UTC | #3
On 08/11/2018 14:48, Auger Eric wrote:
>> +struct virtio_iommu_probe_property {
>> +	__le16					type;
>> +	__le16					length;
> the value[] field has disappeared but still is documented in the v0.8 spec.

Good catch. I removed value[] when reworking the
virtio_iommu_probe_resv_mem definition, because embedding a struct with
a flexible array into another violates the C99 standard, even though GCC
accepts it.

I'll remove it from the spec as well, but I probably won't publish a new
version for this change alone. The virtio spec itself has similar uses
of flexible arrays, that are given for explanation but aren't valid C
(and those wouldn't even compile).

Thanks,
Jean
Eric Auger Nov. 15, 2018, 1:20 p.m. UTC | #4
Hi Jean,
On 10/12/18 4:59 PM, Jean-Philippe Brucker wrote:
> When the device offers the probe feature, send a probe request for each
> device managed by the IOMMU. Extract RESV_MEM information. When we
> encounter a MSI doorbell region, set it up as a IOMMU_RESV_MSI region.
> This will tell other subsystems that there is no need to map the MSI
> doorbell in the virtio-iommu, because MSIs bypass it.
> 
> Signed-off-by: Jean-Philippe Brucker <jean-philippe.brucker@arm.com>
> ---
>  drivers/iommu/virtio-iommu.c      | 147 ++++++++++++++++++++++++++++--
>  include/uapi/linux/virtio_iommu.h |  39 ++++++++
>  2 files changed, 180 insertions(+), 6 deletions(-)
> 
> diff --git a/drivers/iommu/virtio-iommu.c b/drivers/iommu/virtio-iommu.c
> index 9fb38cd3b727..8eaf66770469 100644
> --- a/drivers/iommu/virtio-iommu.c
> +++ b/drivers/iommu/virtio-iommu.c
> @@ -56,6 +56,7 @@ struct viommu_dev {
>  	struct iommu_domain_geometry	geometry;
>  	u64				pgsize_bitmap;
>  	u8				domain_bits;
> +	u32				probe_size;
>  };
>  
>  struct viommu_mapping {
> @@ -77,8 +78,10 @@ struct viommu_domain {
>  };
>  
>  struct viommu_endpoint {
> +	struct device			*dev;
>  	struct viommu_dev		*viommu;
>  	struct viommu_domain		*vdomain;
> +	struct list_head		resv_regions;
>  };
>  
>  struct viommu_request {
> @@ -129,6 +132,9 @@ static off_t viommu_get_req_offset(struct viommu_dev *viommu,
>  {
>  	size_t tail_size = sizeof(struct virtio_iommu_req_tail);
>  
> +	if (req->type == VIRTIO_IOMMU_T_PROBE)
> +		return len - viommu->probe_size - tail_size;
> +
>  	return len - tail_size;
>  }
>  
> @@ -414,6 +420,101 @@ static int viommu_replay_mappings(struct viommu_domain *vdomain)
>  	return ret;
>  }
>  
> +static int viommu_add_resv_mem(struct viommu_endpoint *vdev,
> +			       struct virtio_iommu_probe_resv_mem *mem,
> +			       size_t len)
> +{
> +	struct iommu_resv_region *region = NULL;
> +	unsigned long prot = IOMMU_WRITE | IOMMU_NOEXEC | IOMMU_MMIO;
> +
nit: extra void line
> +	u64 start = le64_to_cpu(mem->start);
> +	u64 end = le64_to_cpu(mem->end);
> +	size_t size = end - start + 1;
> +
> +	if (len < sizeof(*mem))
> +		return -EINVAL;
> +
> +	switch (mem->subtype) {
> +	default:
> +		dev_warn(vdev->dev, "unknown resv mem subtype 0x%x\n",
> +			 mem->subtype);
> +		/* Fall-through */
> +	case VIRTIO_IOMMU_RESV_MEM_T_RESERVED:
> +		region = iommu_alloc_resv_region(start, size, 0,
> +						 IOMMU_RESV_RESERVED);
need to test region
> +		break;
> +	case VIRTIO_IOMMU_RESV_MEM_T_MSI:
> +		region = iommu_alloc_resv_region(start, size, prot,
> +						 IOMMU_RESV_MSI);
same
> +		break;
> +	}
> +
> +	list_add(&vdev->resv_regions, &region->list);
> +	return 0;
> +}
> +
> +static int viommu_probe_endpoint(struct viommu_dev *viommu, struct device *dev)
> +{
> +	int ret;
> +	u16 type, len;
> +	size_t cur = 0;
> +	size_t probe_len;
> +	struct virtio_iommu_req_probe *probe;
> +	struct virtio_iommu_probe_property *prop;
> +	struct iommu_fwspec *fwspec = dev->iommu_fwspec;
> +	struct viommu_endpoint *vdev = fwspec->iommu_priv;
> +
> +	if (!fwspec->num_ids)
> +		return -EINVAL;
> +
> +	probe_len = sizeof(*probe) + viommu->probe_size +
> +		    sizeof(struct virtio_iommu_req_tail);
> +	probe = kzalloc(probe_len, GFP_KERNEL);
> +	if (!probe)
> +		return -ENOMEM;
> +
> +	probe->head.type = VIRTIO_IOMMU_T_PROBE;
> +	/*
> +	 * For now, assume that properties of an endpoint that outputs multiple
> +	 * IDs are consistent. Only probe the first one.
> +	 */
> +	probe->endpoint = cpu_to_le32(fwspec->ids[0]);
> +
> +	ret = viommu_send_req_sync(viommu, probe, probe_len);
> +	if (ret)
> +		goto out_free;
> +
> +	prop = (void *)probe->properties;
> +	type = le16_to_cpu(prop->type) & VIRTIO_IOMMU_PROBE_T_MASK;
> +
> +	while (type != VIRTIO_IOMMU_PROBE_T_NONE &&
> +	       cur < viommu->probe_size) {
> +		len = le16_to_cpu(prop->length) + sizeof(*prop);
> +
> +		switch (type) {
> +		case VIRTIO_IOMMU_PROBE_T_RESV_MEM:
> +			ret = viommu_add_resv_mem(vdev, (void *)prop, len);
> +			break;
> +		default:
> +			dev_err(dev, "unknown viommu prop 0x%x\n", type);
> +		}
> +
> +		if (ret)
> +			dev_err(dev, "failed to parse viommu prop 0x%x\n", type);
> +
> +		cur += len;
> +		if (cur >= viommu->probe_size)
> +			break;
> +
> +		prop = (void *)probe->properties + cur;
> +		type = le16_to_cpu(prop->type) & VIRTIO_IOMMU_PROBE_T_MASK;
> +	}
> +
> +out_free:
> +	kfree(probe);
> +	return ret;
> +}
> +
>  /* IOMMU API */
>  
>  static struct iommu_domain *viommu_domain_alloc(unsigned type)
> @@ -636,15 +737,33 @@ static void viommu_iotlb_sync(struct iommu_domain *domain)
>  
>  static void viommu_get_resv_regions(struct device *dev, struct list_head *head)
>  {
> -	struct iommu_resv_region *region;
> +	struct iommu_resv_region *entry, *new_entry, *msi = NULL;
> +	struct viommu_endpoint *vdev = dev->iommu_fwspec->iommu_priv;
>  	int prot = IOMMU_WRITE | IOMMU_NOEXEC | IOMMU_MMIO;
>  
> -	region = iommu_alloc_resv_region(MSI_IOVA_BASE, MSI_IOVA_LENGTH, prot,
> -					 IOMMU_RESV_SW_MSI);
> -	if (!region)
> -		return;
> +	list_for_each_entry(entry, &vdev->resv_regions, list) {
> +		/*
> +		 * If the device registered a bypass MSI windows, use it.
any bypass MSI windows?
> +		 * Otherwise add a software-mapped region
> +		 */
> +		if (entry->type == IOMMU_RESV_MSI)
> +			msi = entry;
> +
> +		new_entry = kmemdup(entry, sizeof(*entry), GFP_KERNEL);
> +		if (!new_entry)
> +			return;
> +		list_add_tail(&new_entry->list, head);
> +	}
> +
> +	if (!msi) {
> +		msi = iommu_alloc_resv_region(MSI_IOVA_BASE, MSI_IOVA_LENGTH,
> +					      prot, IOMMU_RESV_SW_MSI);
> +		if (!msi)
> +			return;
> +
> +		list_add_tail(&msi->list, head);
> +	}
>  
> -	list_add_tail(&region->list, head);
>  	iommu_dma_get_resv_regions(dev, head);
>  }
>  
> @@ -692,9 +811,18 @@ static int viommu_add_device(struct device *dev)
>  	if (!vdev)
>  		return -ENOMEM;
>  
> +	vdev->dev = dev;
>  	vdev->viommu = viommu;
> +	INIT_LIST_HEAD(&vdev->resv_regions);
>  	fwspec->iommu_priv = vdev;
>  
> +	if (viommu->probe_size) {
> +		/* Get additional information for this endpoint */
> +		ret = viommu_probe_endpoint(viommu, dev);
> +		if (ret)
leaks vdev
> +			return ret;
> +	}
> +
>  	ret = iommu_device_link(&viommu->iommu, dev);
>  	if (ret)
>  		goto err_free_dev;
> @@ -717,6 +845,7 @@ static int viommu_add_device(struct device *dev)
>  	iommu_device_unlink(&viommu->iommu, dev);
>  
>  err_free_dev:
> +	viommu_put_resv_regions(dev, &vdev->resv_regions);
>  	kfree(vdev);
>  
>  	return ret;
> @@ -734,6 +863,7 @@ static void viommu_remove_device(struct device *dev)
>  
>  	iommu_group_remove_device(dev);
>  	iommu_device_unlink(&vdev->viommu->iommu, dev);
> +	viommu_put_resv_regions(dev, &vdev->resv_regions);
>  	kfree(vdev);
>  }
>  
> @@ -832,6 +962,10 @@ static int viommu_probe(struct virtio_device *vdev)
>  			     struct virtio_iommu_config, domain_bits,
>  			     &viommu->domain_bits);
>  
> +	virtio_cread_feature(vdev, VIRTIO_IOMMU_F_PROBE,
> +			     struct virtio_iommu_config, probe_size,
> +			     &viommu->probe_size);
> +
>  	viommu->geometry = (struct iommu_domain_geometry) {
>  		.aperture_start	= input_start,
>  		.aperture_end	= input_end,
> @@ -913,6 +1047,7 @@ static unsigned int features[] = {
>  	VIRTIO_IOMMU_F_MAP_UNMAP,
>  	VIRTIO_IOMMU_F_DOMAIN_BITS,
>  	VIRTIO_IOMMU_F_INPUT_RANGE,
> +	VIRTIO_IOMMU_F_PROBE,
>  };
>  
>  static struct virtio_device_id id_table[] = {
> diff --git a/include/uapi/linux/virtio_iommu.h b/include/uapi/linux/virtio_iommu.h
> index e808fc7fbe82..feed74586bb0 100644
> --- a/include/uapi/linux/virtio_iommu.h
> +++ b/include/uapi/linux/virtio_iommu.h
> @@ -14,6 +14,7 @@
>  #define VIRTIO_IOMMU_F_DOMAIN_BITS		1
>  #define VIRTIO_IOMMU_F_MAP_UNMAP		2
>  #define VIRTIO_IOMMU_F_BYPASS			3
> +#define VIRTIO_IOMMU_F_PROBE			4
>  
>  struct virtio_iommu_config {
>  	/* Supported page sizes */
> @@ -25,6 +26,9 @@ struct virtio_iommu_config {
>  	} input_range;
>  	/* Max domain ID size */
>  	__u8					domain_bits;
> +	__u8					padding[3];
> +	/* Probe buffer size */
> +	__u32					probe_size;
>  };
>  
>  /* Request types */
> @@ -32,6 +36,7 @@ struct virtio_iommu_config {
>  #define VIRTIO_IOMMU_T_DETACH			0x02
>  #define VIRTIO_IOMMU_T_MAP			0x03
>  #define VIRTIO_IOMMU_T_UNMAP			0x04
> +#define VIRTIO_IOMMU_T_PROBE			0x05
>  
>  /* Status types */
>  #define VIRTIO_IOMMU_S_OK			0x00
> @@ -98,4 +103,38 @@ struct virtio_iommu_req_unmap {
>  	struct virtio_iommu_req_tail		tail;
>  };
>  
> +#define VIRTIO_IOMMU_PROBE_T_NONE		0
> +#define VIRTIO_IOMMU_PROBE_T_RESV_MEM		1
> +
> +#define VIRTIO_IOMMU_PROBE_T_MASK		0xfff
> +
> +struct virtio_iommu_probe_property {
> +	__le16					type;
> +	__le16					length;
> +};
> +
> +#define VIRTIO_IOMMU_RESV_MEM_T_RESERVED	0
> +#define VIRTIO_IOMMU_RESV_MEM_T_MSI		1
> +
> +struct virtio_iommu_probe_resv_mem {
> +	struct virtio_iommu_probe_property	head;
> +	__u8					subtype;
> +	__u8					reserved[3];
> +	__le64					start;
> +	__le64					end;
> +};
> +
> +struct virtio_iommu_req_probe {
> +	struct virtio_iommu_req_head		head;
> +	__le32					endpoint;
> +	__u8					reserved[64];
> +
> +	__u8					properties[];
> +
> +	/*
> +	 * Tail follows the variable-length properties array. No padding,
> +	 * property lengths are all aligned on 8 bytes.
> +	 */
> +};
> +
>  #endif
> 

Thanks

Eric
Jean-Philippe Brucker Nov. 15, 2018, 4:22 p.m. UTC | #5
Fixed all of these, thanks

Jean

On 15/11/2018 13:20, Auger Eric wrote:
> Hi Jean,
> On 10/12/18 4:59 PM, Jean-Philippe Brucker wrote:
>> When the device offers the probe feature, send a probe request for each
>> device managed by the IOMMU. Extract RESV_MEM information. When we
>> encounter a MSI doorbell region, set it up as a IOMMU_RESV_MSI region.
>> This will tell other subsystems that there is no need to map the MSI
>> doorbell in the virtio-iommu, because MSIs bypass it.
>>
>> Signed-off-by: Jean-Philippe Brucker <jean-philippe.brucker@arm.com>
>> ---
>>  drivers/iommu/virtio-iommu.c      | 147 ++++++++++++++++++++++++++++--
>>  include/uapi/linux/virtio_iommu.h |  39 ++++++++
>>  2 files changed, 180 insertions(+), 6 deletions(-)
>>
>> diff --git a/drivers/iommu/virtio-iommu.c b/drivers/iommu/virtio-iommu.c
>> index 9fb38cd3b727..8eaf66770469 100644
>> --- a/drivers/iommu/virtio-iommu.c
>> +++ b/drivers/iommu/virtio-iommu.c
>> @@ -56,6 +56,7 @@ struct viommu_dev {
>>  	struct iommu_domain_geometry	geometry;
>>  	u64				pgsize_bitmap;
>>  	u8				domain_bits;
>> +	u32				probe_size;
>>  };
>>  
>>  struct viommu_mapping {
>> @@ -77,8 +78,10 @@ struct viommu_domain {
>>  };
>>  
>>  struct viommu_endpoint {
>> +	struct device			*dev;
>>  	struct viommu_dev		*viommu;
>>  	struct viommu_domain		*vdomain;
>> +	struct list_head		resv_regions;
>>  };
>>  
>>  struct viommu_request {
>> @@ -129,6 +132,9 @@ static off_t viommu_get_req_offset(struct viommu_dev *viommu,
>>  {
>>  	size_t tail_size = sizeof(struct virtio_iommu_req_tail);
>>  
>> +	if (req->type == VIRTIO_IOMMU_T_PROBE)
>> +		return len - viommu->probe_size - tail_size;
>> +
>>  	return len - tail_size;
>>  }
>>  
>> @@ -414,6 +420,101 @@ static int viommu_replay_mappings(struct viommu_domain *vdomain)
>>  	return ret;
>>  }
>>  
>> +static int viommu_add_resv_mem(struct viommu_endpoint *vdev,
>> +			       struct virtio_iommu_probe_resv_mem *mem,
>> +			       size_t len)
>> +{
>> +	struct iommu_resv_region *region = NULL;
>> +	unsigned long prot = IOMMU_WRITE | IOMMU_NOEXEC | IOMMU_MMIO;
>> +
> nit: extra void line
>> +	u64 start = le64_to_cpu(mem->start);
>> +	u64 end = le64_to_cpu(mem->end);
>> +	size_t size = end - start + 1;
>> +
>> +	if (len < sizeof(*mem))
>> +		return -EINVAL;
>> +
>> +	switch (mem->subtype) {
>> +	default:
>> +		dev_warn(vdev->dev, "unknown resv mem subtype 0x%x\n",
>> +			 mem->subtype);
>> +		/* Fall-through */
>> +	case VIRTIO_IOMMU_RESV_MEM_T_RESERVED:
>> +		region = iommu_alloc_resv_region(start, size, 0,
>> +						 IOMMU_RESV_RESERVED);
> need to test region
>> +		break;
>> +	case VIRTIO_IOMMU_RESV_MEM_T_MSI:
>> +		region = iommu_alloc_resv_region(start, size, prot,
>> +						 IOMMU_RESV_MSI);
> same
>> +		break;
>> +	}
>> +
>> +	list_add(&vdev->resv_regions, &region->list);
>> +	return 0;
>> +}
>> +
>> +static int viommu_probe_endpoint(struct viommu_dev *viommu, struct device *dev)
>> +{
>> +	int ret;
>> +	u16 type, len;
>> +	size_t cur = 0;
>> +	size_t probe_len;
>> +	struct virtio_iommu_req_probe *probe;
>> +	struct virtio_iommu_probe_property *prop;
>> +	struct iommu_fwspec *fwspec = dev->iommu_fwspec;
>> +	struct viommu_endpoint *vdev = fwspec->iommu_priv;
>> +
>> +	if (!fwspec->num_ids)
>> +		return -EINVAL;
>> +
>> +	probe_len = sizeof(*probe) + viommu->probe_size +
>> +		    sizeof(struct virtio_iommu_req_tail);
>> +	probe = kzalloc(probe_len, GFP_KERNEL);
>> +	if (!probe)
>> +		return -ENOMEM;
>> +
>> +	probe->head.type = VIRTIO_IOMMU_T_PROBE;
>> +	/*
>> +	 * For now, assume that properties of an endpoint that outputs multiple
>> +	 * IDs are consistent. Only probe the first one.
>> +	 */
>> +	probe->endpoint = cpu_to_le32(fwspec->ids[0]);
>> +
>> +	ret = viommu_send_req_sync(viommu, probe, probe_len);
>> +	if (ret)
>> +		goto out_free;
>> +
>> +	prop = (void *)probe->properties;
>> +	type = le16_to_cpu(prop->type) & VIRTIO_IOMMU_PROBE_T_MASK;
>> +
>> +	while (type != VIRTIO_IOMMU_PROBE_T_NONE &&
>> +	       cur < viommu->probe_size) {
>> +		len = le16_to_cpu(prop->length) + sizeof(*prop);
>> +
>> +		switch (type) {
>> +		case VIRTIO_IOMMU_PROBE_T_RESV_MEM:
>> +			ret = viommu_add_resv_mem(vdev, (void *)prop, len);
>> +			break;
>> +		default:
>> +			dev_err(dev, "unknown viommu prop 0x%x\n", type);
>> +		}
>> +
>> +		if (ret)
>> +			dev_err(dev, "failed to parse viommu prop 0x%x\n", type);
>> +
>> +		cur += len;
>> +		if (cur >= viommu->probe_size)
>> +			break;
>> +
>> +		prop = (void *)probe->properties + cur;
>> +		type = le16_to_cpu(prop->type) & VIRTIO_IOMMU_PROBE_T_MASK;
>> +	}
>> +
>> +out_free:
>> +	kfree(probe);
>> +	return ret;
>> +}
>> +
>>  /* IOMMU API */
>>  
>>  static struct iommu_domain *viommu_domain_alloc(unsigned type)
>> @@ -636,15 +737,33 @@ static void viommu_iotlb_sync(struct iommu_domain *domain)
>>  
>>  static void viommu_get_resv_regions(struct device *dev, struct list_head *head)
>>  {
>> -	struct iommu_resv_region *region;
>> +	struct iommu_resv_region *entry, *new_entry, *msi = NULL;
>> +	struct viommu_endpoint *vdev = dev->iommu_fwspec->iommu_priv;
>>  	int prot = IOMMU_WRITE | IOMMU_NOEXEC | IOMMU_MMIO;
>>  
>> -	region = iommu_alloc_resv_region(MSI_IOVA_BASE, MSI_IOVA_LENGTH, prot,
>> -					 IOMMU_RESV_SW_MSI);
>> -	if (!region)
>> -		return;
>> +	list_for_each_entry(entry, &vdev->resv_regions, list) {
>> +		/*
>> +		 * If the device registered a bypass MSI windows, use it.
> any bypass MSI windows?
>> +		 * Otherwise add a software-mapped region
>> +		 */
>> +		if (entry->type == IOMMU_RESV_MSI)
>> +			msi = entry;
>> +
>> +		new_entry = kmemdup(entry, sizeof(*entry), GFP_KERNEL);
>> +		if (!new_entry)
>> +			return;
>> +		list_add_tail(&new_entry->list, head);
>> +	}
>> +
>> +	if (!msi) {
>> +		msi = iommu_alloc_resv_region(MSI_IOVA_BASE, MSI_IOVA_LENGTH,
>> +					      prot, IOMMU_RESV_SW_MSI);
>> +		if (!msi)
>> +			return;
>> +
>> +		list_add_tail(&msi->list, head);
>> +	}
>>  
>> -	list_add_tail(&region->list, head);
>>  	iommu_dma_get_resv_regions(dev, head);
>>  }
>>  
>> @@ -692,9 +811,18 @@ static int viommu_add_device(struct device *dev)
>>  	if (!vdev)
>>  		return -ENOMEM;
>>  
>> +	vdev->dev = dev;
>>  	vdev->viommu = viommu;
>> +	INIT_LIST_HEAD(&vdev->resv_regions);
>>  	fwspec->iommu_priv = vdev;
>>  
>> +	if (viommu->probe_size) {
>> +		/* Get additional information for this endpoint */
>> +		ret = viommu_probe_endpoint(viommu, dev);
>> +		if (ret)
> leaks vdev
>> +			return ret;
>> +	}
>> +
>>  	ret = iommu_device_link(&viommu->iommu, dev);
>>  	if (ret)
>>  		goto err_free_dev;
>> @@ -717,6 +845,7 @@ static int viommu_add_device(struct device *dev)
>>  	iommu_device_unlink(&viommu->iommu, dev);
>>  
>>  err_free_dev:
>> +	viommu_put_resv_regions(dev, &vdev->resv_regions);
>>  	kfree(vdev);
>>  
>>  	return ret;
>> @@ -734,6 +863,7 @@ static void viommu_remove_device(struct device *dev)
>>  
>>  	iommu_group_remove_device(dev);
>>  	iommu_device_unlink(&vdev->viommu->iommu, dev);
>> +	viommu_put_resv_regions(dev, &vdev->resv_regions);
>>  	kfree(vdev);
>>  }
>>  
>> @@ -832,6 +962,10 @@ static int viommu_probe(struct virtio_device *vdev)
>>  			     struct virtio_iommu_config, domain_bits,
>>  			     &viommu->domain_bits);
>>  
>> +	virtio_cread_feature(vdev, VIRTIO_IOMMU_F_PROBE,
>> +			     struct virtio_iommu_config, probe_size,
>> +			     &viommu->probe_size);
>> +
>>  	viommu->geometry = (struct iommu_domain_geometry) {
>>  		.aperture_start	= input_start,
>>  		.aperture_end	= input_end,
>> @@ -913,6 +1047,7 @@ static unsigned int features[] = {
>>  	VIRTIO_IOMMU_F_MAP_UNMAP,
>>  	VIRTIO_IOMMU_F_DOMAIN_BITS,
>>  	VIRTIO_IOMMU_F_INPUT_RANGE,
>> +	VIRTIO_IOMMU_F_PROBE,
>>  };
>>  
>>  static struct virtio_device_id id_table[] = {
>> diff --git a/include/uapi/linux/virtio_iommu.h b/include/uapi/linux/virtio_iommu.h
>> index e808fc7fbe82..feed74586bb0 100644
>> --- a/include/uapi/linux/virtio_iommu.h
>> +++ b/include/uapi/linux/virtio_iommu.h
>> @@ -14,6 +14,7 @@
>>  #define VIRTIO_IOMMU_F_DOMAIN_BITS		1
>>  #define VIRTIO_IOMMU_F_MAP_UNMAP		2
>>  #define VIRTIO_IOMMU_F_BYPASS			3
>> +#define VIRTIO_IOMMU_F_PROBE			4
>>  
>>  struct virtio_iommu_config {
>>  	/* Supported page sizes */
>> @@ -25,6 +26,9 @@ struct virtio_iommu_config {
>>  	} input_range;
>>  	/* Max domain ID size */
>>  	__u8					domain_bits;
>> +	__u8					padding[3];
>> +	/* Probe buffer size */
>> +	__u32					probe_size;
>>  };
>>  
>>  /* Request types */
>> @@ -32,6 +36,7 @@ struct virtio_iommu_config {
>>  #define VIRTIO_IOMMU_T_DETACH			0x02
>>  #define VIRTIO_IOMMU_T_MAP			0x03
>>  #define VIRTIO_IOMMU_T_UNMAP			0x04
>> +#define VIRTIO_IOMMU_T_PROBE			0x05
>>  
>>  /* Status types */
>>  #define VIRTIO_IOMMU_S_OK			0x00
>> @@ -98,4 +103,38 @@ struct virtio_iommu_req_unmap {
>>  	struct virtio_iommu_req_tail		tail;
>>  };
>>  
>> +#define VIRTIO_IOMMU_PROBE_T_NONE		0
>> +#define VIRTIO_IOMMU_PROBE_T_RESV_MEM		1
>> +
>> +#define VIRTIO_IOMMU_PROBE_T_MASK		0xfff
>> +
>> +struct virtio_iommu_probe_property {
>> +	__le16					type;
>> +	__le16					length;
>> +};
>> +
>> +#define VIRTIO_IOMMU_RESV_MEM_T_RESERVED	0
>> +#define VIRTIO_IOMMU_RESV_MEM_T_MSI		1
>> +
>> +struct virtio_iommu_probe_resv_mem {
>> +	struct virtio_iommu_probe_property	head;
>> +	__u8					subtype;
>> +	__u8					reserved[3];
>> +	__le64					start;
>> +	__le64					end;
>> +};
>> +
>> +struct virtio_iommu_req_probe {
>> +	struct virtio_iommu_req_head		head;
>> +	__le32					endpoint;
>> +	__u8					reserved[64];
>> +
>> +	__u8					properties[];
>> +
>> +	/*
>> +	 * Tail follows the variable-length properties array. No padding,
>> +	 * property lengths are all aligned on 8 bytes.
>> +	 */
>> +};
>> +
>>  #endif
>>
> 
> Thanks
> 
> Eric
> _______________________________________________
> iommu mailing list
> iommu@lists.linux-foundation.org
> https://lists.linuxfoundation.org/mailman/listinfo/iommu
>
diff mbox series

Patch

diff --git a/drivers/iommu/virtio-iommu.c b/drivers/iommu/virtio-iommu.c
index 9fb38cd3b727..8eaf66770469 100644
--- a/drivers/iommu/virtio-iommu.c
+++ b/drivers/iommu/virtio-iommu.c
@@ -56,6 +56,7 @@  struct viommu_dev {
 	struct iommu_domain_geometry	geometry;
 	u64				pgsize_bitmap;
 	u8				domain_bits;
+	u32				probe_size;
 };
 
 struct viommu_mapping {
@@ -77,8 +78,10 @@  struct viommu_domain {
 };
 
 struct viommu_endpoint {
+	struct device			*dev;
 	struct viommu_dev		*viommu;
 	struct viommu_domain		*vdomain;
+	struct list_head		resv_regions;
 };
 
 struct viommu_request {
@@ -129,6 +132,9 @@  static off_t viommu_get_req_offset(struct viommu_dev *viommu,
 {
 	size_t tail_size = sizeof(struct virtio_iommu_req_tail);
 
+	if (req->type == VIRTIO_IOMMU_T_PROBE)
+		return len - viommu->probe_size - tail_size;
+
 	return len - tail_size;
 }
 
@@ -414,6 +420,101 @@  static int viommu_replay_mappings(struct viommu_domain *vdomain)
 	return ret;
 }
 
+static int viommu_add_resv_mem(struct viommu_endpoint *vdev,
+			       struct virtio_iommu_probe_resv_mem *mem,
+			       size_t len)
+{
+	struct iommu_resv_region *region = NULL;
+	unsigned long prot = IOMMU_WRITE | IOMMU_NOEXEC | IOMMU_MMIO;
+
+	u64 start = le64_to_cpu(mem->start);
+	u64 end = le64_to_cpu(mem->end);
+	size_t size = end - start + 1;
+
+	if (len < sizeof(*mem))
+		return -EINVAL;
+
+	switch (mem->subtype) {
+	default:
+		dev_warn(vdev->dev, "unknown resv mem subtype 0x%x\n",
+			 mem->subtype);
+		/* Fall-through */
+	case VIRTIO_IOMMU_RESV_MEM_T_RESERVED:
+		region = iommu_alloc_resv_region(start, size, 0,
+						 IOMMU_RESV_RESERVED);
+		break;
+	case VIRTIO_IOMMU_RESV_MEM_T_MSI:
+		region = iommu_alloc_resv_region(start, size, prot,
+						 IOMMU_RESV_MSI);
+		break;
+	}
+
+	list_add(&vdev->resv_regions, &region->list);
+	return 0;
+}
+
+static int viommu_probe_endpoint(struct viommu_dev *viommu, struct device *dev)
+{
+	int ret;
+	u16 type, len;
+	size_t cur = 0;
+	size_t probe_len;
+	struct virtio_iommu_req_probe *probe;
+	struct virtio_iommu_probe_property *prop;
+	struct iommu_fwspec *fwspec = dev->iommu_fwspec;
+	struct viommu_endpoint *vdev = fwspec->iommu_priv;
+
+	if (!fwspec->num_ids)
+		return -EINVAL;
+
+	probe_len = sizeof(*probe) + viommu->probe_size +
+		    sizeof(struct virtio_iommu_req_tail);
+	probe = kzalloc(probe_len, GFP_KERNEL);
+	if (!probe)
+		return -ENOMEM;
+
+	probe->head.type = VIRTIO_IOMMU_T_PROBE;
+	/*
+	 * For now, assume that properties of an endpoint that outputs multiple
+	 * IDs are consistent. Only probe the first one.
+	 */
+	probe->endpoint = cpu_to_le32(fwspec->ids[0]);
+
+	ret = viommu_send_req_sync(viommu, probe, probe_len);
+	if (ret)
+		goto out_free;
+
+	prop = (void *)probe->properties;
+	type = le16_to_cpu(prop->type) & VIRTIO_IOMMU_PROBE_T_MASK;
+
+	while (type != VIRTIO_IOMMU_PROBE_T_NONE &&
+	       cur < viommu->probe_size) {
+		len = le16_to_cpu(prop->length) + sizeof(*prop);
+
+		switch (type) {
+		case VIRTIO_IOMMU_PROBE_T_RESV_MEM:
+			ret = viommu_add_resv_mem(vdev, (void *)prop, len);
+			break;
+		default:
+			dev_err(dev, "unknown viommu prop 0x%x\n", type);
+		}
+
+		if (ret)
+			dev_err(dev, "failed to parse viommu prop 0x%x\n", type);
+
+		cur += len;
+		if (cur >= viommu->probe_size)
+			break;
+
+		prop = (void *)probe->properties + cur;
+		type = le16_to_cpu(prop->type) & VIRTIO_IOMMU_PROBE_T_MASK;
+	}
+
+out_free:
+	kfree(probe);
+	return ret;
+}
+
 /* IOMMU API */
 
 static struct iommu_domain *viommu_domain_alloc(unsigned type)
@@ -636,15 +737,33 @@  static void viommu_iotlb_sync(struct iommu_domain *domain)
 
 static void viommu_get_resv_regions(struct device *dev, struct list_head *head)
 {
-	struct iommu_resv_region *region;
+	struct iommu_resv_region *entry, *new_entry, *msi = NULL;
+	struct viommu_endpoint *vdev = dev->iommu_fwspec->iommu_priv;
 	int prot = IOMMU_WRITE | IOMMU_NOEXEC | IOMMU_MMIO;
 
-	region = iommu_alloc_resv_region(MSI_IOVA_BASE, MSI_IOVA_LENGTH, prot,
-					 IOMMU_RESV_SW_MSI);
-	if (!region)
-		return;
+	list_for_each_entry(entry, &vdev->resv_regions, list) {
+		/*
+		 * If the device registered a bypass MSI windows, use it.
+		 * Otherwise add a software-mapped region
+		 */
+		if (entry->type == IOMMU_RESV_MSI)
+			msi = entry;
+
+		new_entry = kmemdup(entry, sizeof(*entry), GFP_KERNEL);
+		if (!new_entry)
+			return;
+		list_add_tail(&new_entry->list, head);
+	}
+
+	if (!msi) {
+		msi = iommu_alloc_resv_region(MSI_IOVA_BASE, MSI_IOVA_LENGTH,
+					      prot, IOMMU_RESV_SW_MSI);
+		if (!msi)
+			return;
+
+		list_add_tail(&msi->list, head);
+	}
 
-	list_add_tail(&region->list, head);
 	iommu_dma_get_resv_regions(dev, head);
 }
 
@@ -692,9 +811,18 @@  static int viommu_add_device(struct device *dev)
 	if (!vdev)
 		return -ENOMEM;
 
+	vdev->dev = dev;
 	vdev->viommu = viommu;
+	INIT_LIST_HEAD(&vdev->resv_regions);
 	fwspec->iommu_priv = vdev;
 
+	if (viommu->probe_size) {
+		/* Get additional information for this endpoint */
+		ret = viommu_probe_endpoint(viommu, dev);
+		if (ret)
+			return ret;
+	}
+
 	ret = iommu_device_link(&viommu->iommu, dev);
 	if (ret)
 		goto err_free_dev;
@@ -717,6 +845,7 @@  static int viommu_add_device(struct device *dev)
 	iommu_device_unlink(&viommu->iommu, dev);
 
 err_free_dev:
+	viommu_put_resv_regions(dev, &vdev->resv_regions);
 	kfree(vdev);
 
 	return ret;
@@ -734,6 +863,7 @@  static void viommu_remove_device(struct device *dev)
 
 	iommu_group_remove_device(dev);
 	iommu_device_unlink(&vdev->viommu->iommu, dev);
+	viommu_put_resv_regions(dev, &vdev->resv_regions);
 	kfree(vdev);
 }
 
@@ -832,6 +962,10 @@  static int viommu_probe(struct virtio_device *vdev)
 			     struct virtio_iommu_config, domain_bits,
 			     &viommu->domain_bits);
 
+	virtio_cread_feature(vdev, VIRTIO_IOMMU_F_PROBE,
+			     struct virtio_iommu_config, probe_size,
+			     &viommu->probe_size);
+
 	viommu->geometry = (struct iommu_domain_geometry) {
 		.aperture_start	= input_start,
 		.aperture_end	= input_end,
@@ -913,6 +1047,7 @@  static unsigned int features[] = {
 	VIRTIO_IOMMU_F_MAP_UNMAP,
 	VIRTIO_IOMMU_F_DOMAIN_BITS,
 	VIRTIO_IOMMU_F_INPUT_RANGE,
+	VIRTIO_IOMMU_F_PROBE,
 };
 
 static struct virtio_device_id id_table[] = {
diff --git a/include/uapi/linux/virtio_iommu.h b/include/uapi/linux/virtio_iommu.h
index e808fc7fbe82..feed74586bb0 100644
--- a/include/uapi/linux/virtio_iommu.h
+++ b/include/uapi/linux/virtio_iommu.h
@@ -14,6 +14,7 @@ 
 #define VIRTIO_IOMMU_F_DOMAIN_BITS		1
 #define VIRTIO_IOMMU_F_MAP_UNMAP		2
 #define VIRTIO_IOMMU_F_BYPASS			3
+#define VIRTIO_IOMMU_F_PROBE			4
 
 struct virtio_iommu_config {
 	/* Supported page sizes */
@@ -25,6 +26,9 @@  struct virtio_iommu_config {
 	} input_range;
 	/* Max domain ID size */
 	__u8					domain_bits;
+	__u8					padding[3];
+	/* Probe buffer size */
+	__u32					probe_size;
 };
 
 /* Request types */
@@ -32,6 +36,7 @@  struct virtio_iommu_config {
 #define VIRTIO_IOMMU_T_DETACH			0x02
 #define VIRTIO_IOMMU_T_MAP			0x03
 #define VIRTIO_IOMMU_T_UNMAP			0x04
+#define VIRTIO_IOMMU_T_PROBE			0x05
 
 /* Status types */
 #define VIRTIO_IOMMU_S_OK			0x00
@@ -98,4 +103,38 @@  struct virtio_iommu_req_unmap {
 	struct virtio_iommu_req_tail		tail;
 };
 
+#define VIRTIO_IOMMU_PROBE_T_NONE		0
+#define VIRTIO_IOMMU_PROBE_T_RESV_MEM		1
+
+#define VIRTIO_IOMMU_PROBE_T_MASK		0xfff
+
+struct virtio_iommu_probe_property {
+	__le16					type;
+	__le16					length;
+};
+
+#define VIRTIO_IOMMU_RESV_MEM_T_RESERVED	0
+#define VIRTIO_IOMMU_RESV_MEM_T_MSI		1
+
+struct virtio_iommu_probe_resv_mem {
+	struct virtio_iommu_probe_property	head;
+	__u8					subtype;
+	__u8					reserved[3];
+	__le64					start;
+	__le64					end;
+};
+
+struct virtio_iommu_req_probe {
+	struct virtio_iommu_req_head		head;
+	__le32					endpoint;
+	__u8					reserved[64];
+
+	__u8					properties[];
+
+	/*
+	 * Tail follows the variable-length properties array. No padding,
+	 * property lengths are all aligned on 8 bytes.
+	 */
+};
+
 #endif