@@ -45,6 +45,9 @@ struct iommufd_ctx {
struct mutex sw_msi_lock;
struct list_head sw_msi_list;
unsigned int sw_msi_id;
+ /* User-programmed SW_MSI region, to override igroup->sw_msi_start */
+ phys_addr_t sw_msi_start;
+ size_t sw_msi_size;
u8 account_mode;
/* Compatibility with VFIO no iommu */
@@ -281,6 +284,7 @@ int iommufd_ioas_unmap(struct iommufd_ucmd *ucmd);
int iommufd_ioas_option(struct iommufd_ucmd *ucmd);
int iommufd_option_rlimit_mode(struct iommu_option *cmd,
struct iommufd_ctx *ictx);
+int iommufd_option_sw_msi(struct iommu_option *cmd, struct iommufd_ctx *ictx);
int iommufd_vfio_ioas(struct iommufd_ucmd *ucmd);
int iommufd_check_iova_range(struct io_pagetable *iopt,
@@ -294,7 +294,9 @@ struct iommu_ioas_unmap {
/**
* enum iommufd_option - ioctl(IOMMU_OPTION_RLIMIT_MODE) and
- * ioctl(IOMMU_OPTION_HUGE_PAGES)
+ * ioctl(IOMMU_OPTION_HUGE_PAGES) and
+ * ioctl(IOMMU_OPTION_SW_MSI_START) and
+ * ioctl(IOMMU_OPTION_SW_MSI_SIZE)
* @IOMMU_OPTION_RLIMIT_MODE:
* Change how RLIMIT_MEMLOCK accounting works. The caller must have privilege
* to invoke this. Value 0 (default) is user based accounting, 1 uses process
@@ -304,10 +306,24 @@ struct iommu_ioas_unmap {
* iommu mappings. Value 0 disables combining, everything is mapped to
* PAGE_SIZE. This can be useful for benchmarking. This is a per-IOAS
* option, the object_id must be the IOAS ID.
+ * @IOMMU_OPTION_SW_MSI_START:
+ * Change the base address of the IOMMU mapping region for MSI doorbell(s).
+ * It must be set this before attaching a device to an IOAS/HWPT, otherwise
+ * this option will be not effective on that IOAS/HWPT. User can choose to
+ * let kernel pick a base address, by simply ignoring this option or setting
+ * a value 0 to IOMMU_OPTION_SW_MSI_SIZE. Global option, object_id must be 0
+ * @IOMMU_OPTION_SW_MSI_SIZE:
+ * Change the size of the IOMMU mapping region for MSI doorbell(s). It must
+ * be set this before attaching a device to an IOAS/HWPT, otherwise it won't
+ * be effective on that IOAS/HWPT. The value is in MB, and the minimum value
+ * is 1 MB. A value 0 (default) will invalidate the MSI doorbell base address
+ * value set to IOMMU_OPTION_SW_MSI_START. Global option, object_id must be 0
*/
enum iommufd_option {
IOMMU_OPTION_RLIMIT_MODE = 0,
IOMMU_OPTION_HUGE_PAGES = 1,
+ IOMMU_OPTION_SW_MSI_START = 2,
+ IOMMU_OPTION_SW_MSI_SIZE = 3,
};
/**
@@ -445,10 +445,14 @@ static int
iommufd_device_attach_reserved_iova(struct iommufd_device *idev,
struct iommufd_hwpt_paging *hwpt_paging)
{
+ struct iommufd_ctx *ictx = idev->ictx;
int rc;
lockdep_assert_held(&idev->igroup->lock);
+ /* Override it with a user-programmed SW_MSI region */
+ if (ictx->sw_msi_size && ictx->sw_msi_start != PHYS_ADDR_MAX)
+ idev->igroup->sw_msi_start = ictx->sw_msi_start;
rc = iopt_table_enforce_dev_resv_regions(&hwpt_paging->ioas->iopt,
idev->dev,
&idev->igroup->sw_msi_start);
@@ -1446,7 +1446,9 @@ int iopt_table_enforce_dev_resv_regions(struct io_pagetable *iopt,
if (sw_msi_start && resv->type == IOMMU_RESV_MSI)
num_hw_msi++;
if (sw_msi_start && resv->type == IOMMU_RESV_SW_MSI) {
- *sw_msi_start = resv->start;
+ /* Bypass the driver-defined SW_MSI region, if preset */
+ if (*sw_msi_start == PHYS_ADDR_MAX)
+ *sw_msi_start = resv->start;
num_sw_msi++;
}
@@ -620,6 +620,40 @@ int iommufd_option_rlimit_mode(struct iommu_option *cmd,
return -EOPNOTSUPP;
}
+int iommufd_option_sw_msi(struct iommu_option *cmd, struct iommufd_ctx *ictx)
+{
+ if (cmd->object_id)
+ return -EOPNOTSUPP;
+
+ if (cmd->op == IOMMU_OPTION_OP_GET) {
+ switch (cmd->option_id) {
+ case IOMMU_OPTION_SW_MSI_START:
+ cmd->val64 = (u64)ictx->sw_msi_start;
+ break;
+ case IOMMU_OPTION_SW_MSI_SIZE:
+ cmd->val64 = (u64)ictx->sw_msi_size;
+ break;
+ default:
+ return -EOPNOTSUPP;
+ }
+ return 0;
+ }
+ if (cmd->op == IOMMU_OPTION_OP_SET) {
+ switch (cmd->option_id) {
+ case IOMMU_OPTION_SW_MSI_START:
+ ictx->sw_msi_start = (phys_addr_t)cmd->val64;
+ break;
+ case IOMMU_OPTION_SW_MSI_SIZE:
+ ictx->sw_msi_size = (size_t)cmd->val64;
+ break;
+ default:
+ return -EOPNOTSUPP;
+ }
+ return 0;
+ }
+ return -EOPNOTSUPP;
+}
+
static int iommufd_ioas_option_huge_pages(struct iommu_option *cmd,
struct iommufd_ioas *ioas)
{
@@ -229,6 +229,8 @@ static int iommufd_fops_open(struct inode *inode, struct file *filp)
init_waitqueue_head(&ictx->destroy_wait);
mutex_init(&ictx->sw_msi_lock);
INIT_LIST_HEAD(&ictx->sw_msi_list);
+ ictx->sw_msi_start = PHYS_ADDR_MAX;
+ ictx->sw_msi_size = 0;
filp->private_data = ictx;
return 0;
}
@@ -287,6 +289,10 @@ static int iommufd_option(struct iommufd_ucmd *ucmd)
case IOMMU_OPTION_RLIMIT_MODE:
rc = iommufd_option_rlimit_mode(cmd, ucmd->ictx);
break;
+ case IOMMU_OPTION_SW_MSI_START:
+ case IOMMU_OPTION_SW_MSI_SIZE:
+ rc = iommufd_option_sw_msi(cmd, ucmd->ictx);
+ break;
case IOMMU_OPTION_HUGE_PAGES:
rc = iommufd_ioas_option(ucmd);
break;
For systems that require MSI pages to be mapped into the IOMMU translation the IOMMU driver provides an IOMMU_RESV_SW_MSI range, which is the default recommended IOVA window to place these mappings. However, there is nothing special about this address. And to support the RMR trick in VMM for nested translation, the VMM needs to know what sw_msi window the kernel is using. As there is no particular reason to force VMM to adopt the kernel default, provide a simple IOMMU_OPTION_SW_MSI_START/SIZE ioctl that the VMM can use to directly specify the sw_msi window that it wants to use, which replaces and disables the default IOMMU_RESV_SW_MSI from the driver to avoid having to build an API to discover the default IOMMU_RESV_SW_MSI. Since iommufd now has its own sw_msi function, this is easy to implement. To keep things simple, the parameters are global to the entire iommufd FD, and will directly replace the IOMMU_RESV_SW_MSI values. The VMM must set the values before creating any hwpt's to have any effect. Suggested-by: Jason Gunthorpe <jgg@nvidia.com> Signed-off-by: Nicolin Chen <nicolinc@nvidia.com> --- drivers/iommu/iommufd/iommufd_private.h | 4 +++ include/uapi/linux/iommufd.h | 18 ++++++++++++- drivers/iommu/iommufd/device.c | 4 +++ drivers/iommu/iommufd/io_pagetable.c | 4 ++- drivers/iommu/iommufd/ioas.c | 34 +++++++++++++++++++++++++ drivers/iommu/iommufd/main.c | 6 +++++ 6 files changed, 68 insertions(+), 2 deletions(-)