diff mbox series

[rdma-next,3/3] RDMA/mlx5: Expose RDMA TRANSPORT flow table types to userspace

Message ID 2746a9a8b04d8d6b4fd46b7629daf481aebb2619.1735817449.git.leon@kernel.org (mailing list archive)
State New
Headers show
Series Add RDMA TRANSPORT steering domain | expand

Commit Message

Leon Romanovsky Jan. 2, 2025, 11:36 a.m. UTC
From: Patrisious Haddad <phaddad@nvidia.com>

This patch adds RDMA_TRANSPORT_RX and RDMA_TRANSPORT_TX as a new flow
table type for matcher creation.

Signed-off-by: Patrisious Haddad <phaddad@nvidia.com>
Reviewed-by: Mark Bloch <mbloch@nvidia.com>
Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
---
 drivers/infiniband/hw/mlx5/fs.c           | 140 ++++++++++++++++++++--
 drivers/infiniband/hw/mlx5/fs.h           |   2 +
 drivers/infiniband/hw/mlx5/mlx5_ib.h      |   3 +
 include/uapi/rdma/mlx5_user_ioctl_cmds.h  |   1 +
 include/uapi/rdma/mlx5_user_ioctl_verbs.h |   2 +
 5 files changed, 135 insertions(+), 13 deletions(-)
diff mbox series

Patch

diff --git a/drivers/infiniband/hw/mlx5/fs.c b/drivers/infiniband/hw/mlx5/fs.c
index 520034acf73aa..2d1430673238e 100644
--- a/drivers/infiniband/hw/mlx5/fs.c
+++ b/drivers/infiniband/hw/mlx5/fs.c
@@ -690,7 +690,7 @@  static struct mlx5_ib_flow_prio *_get_prio(struct mlx5_ib_dev *dev,
 					   struct mlx5_ib_flow_prio *prio,
 					   int priority,
 					   int num_entries, int num_groups,
-					   u32 flags)
+					   u32 flags, u16 vport)
 {
 	struct mlx5_flow_table_attr ft_attr = {};
 	struct mlx5_flow_table *ft;
@@ -698,6 +698,7 @@  static struct mlx5_ib_flow_prio *_get_prio(struct mlx5_ib_dev *dev,
 	ft_attr.prio = priority;
 	ft_attr.max_fte = num_entries;
 	ft_attr.flags = flags;
+	ft_attr.vport = vport;
 	ft_attr.autogroup.max_num_groups = num_groups;
 	ft = mlx5_create_auto_grouped_flow_table(ns, &ft_attr);
 	if (IS_ERR(ft))
@@ -792,7 +793,7 @@  static struct mlx5_ib_flow_prio *get_flow_table(struct mlx5_ib_dev *dev,
 	ft = prio->flow_table;
 	if (!ft)
 		return _get_prio(dev, ns, prio, priority, max_table_size,
-				 num_groups, flags);
+				 num_groups, flags, 0);
 
 	return prio;
 }
@@ -935,7 +936,7 @@  int mlx5_ib_fs_add_op_fc(struct mlx5_ib_dev *dev, u32 port_num,
 	prio = &dev->flow_db->opfcs[type];
 	if (!prio->flow_table) {
 		prio = _get_prio(dev, ns, prio, priority,
-				 dev->num_ports * MAX_OPFC_RULES, 1, 0);
+				 dev->num_ports * MAX_OPFC_RULES, 1, 0, 0);
 		if (IS_ERR(prio)) {
 			err = PTR_ERR(prio);
 			goto free;
@@ -1413,17 +1414,51 @@  static struct ib_flow *mlx5_ib_create_flow(struct ib_qp *qp,
 	return ERR_PTR(err);
 }
 
+static int mlx5_ib_fill_transport_ns_info(struct mlx5_ib_dev *dev,
+					  enum mlx5_flow_namespace_type type,
+					  u32 *flags, u16 *vport_idx,
+					  u16 *vport,
+					  struct mlx5_core_dev **ft_mdev,
+					  u32 ib_port)
+{
+	struct mlx5_core_dev *esw_mdev;
+
+	if (!is_mdev_switchdev_mode(dev->mdev))
+		return 0;
+
+	if (!MLX5_CAP_ADV_RDMA(dev->mdev, rdma_transport_manager))
+		return -EOPNOTSUPP;
+
+	if (!dev->port[ib_port - 1].rep)
+		return -EINVAL;
+
+	esw_mdev = mlx5_eswitch_get_core_dev(dev->port[ib_port - 1].rep->esw);
+	if (esw_mdev != dev->mdev)
+		return -EOPNOTSUPP;
+
+	*flags |= MLX5_FLOW_TABLE_OTHER_VPORT;
+	*ft_mdev = esw_mdev;
+	*vport = dev->port[ib_port - 1].rep->vport;
+	*vport_idx = dev->port[ib_port - 1].rep->vport_index;
+
+	return 0;
+}
+
 static struct mlx5_ib_flow_prio *
 _get_flow_table(struct mlx5_ib_dev *dev, u16 user_priority,
 		enum mlx5_flow_namespace_type ns_type,
-		bool mcast)
+		bool mcast, u32 ib_port)
 {
+	struct mlx5_core_dev *ft_mdev = dev->mdev;
 	struct mlx5_flow_namespace *ns = NULL;
 	struct mlx5_ib_flow_prio *prio = NULL;
 	int max_table_size = 0;
+	u16 vport_idx = 0;
 	bool esw_encap;
 	u32 flags = 0;
+	u16 vport = 0;
 	int priority;
+	int ret;
 
 	if (mcast)
 		priority = MLX5_IB_FLOW_MCAST_PRIO;
@@ -1471,13 +1506,38 @@  _get_flow_table(struct mlx5_ib_dev *dev, u16 user_priority,
 			MLX5_CAP_FLOWTABLE_RDMA_TX(dev->mdev, log_max_ft_size));
 		priority = user_priority;
 		break;
+	case MLX5_FLOW_NAMESPACE_RDMA_TRANSPORT_RX:
+	case MLX5_FLOW_NAMESPACE_RDMA_TRANSPORT_TX:
+		if (ib_port == 0 || user_priority > MLX5_RDMA_TRANSPORT_BYPASS_PRIO)
+			return ERR_PTR(-EINVAL);
+		ret = mlx5_ib_fill_transport_ns_info(dev, ns_type, &flags,
+						     &vport_idx, &vport,
+						     &ft_mdev, ib_port);
+		if (ret)
+			return ERR_PTR(ret);
+
+		if (ns_type == MLX5_FLOW_NAMESPACE_RDMA_TRANSPORT_RX)
+			max_table_size =
+				BIT(MLX5_CAP_FLOWTABLE_RDMA_TRANSPORT_RX(
+					ft_mdev, log_max_ft_size));
+		else
+			max_table_size =
+				BIT(MLX5_CAP_FLOWTABLE_RDMA_TRANSPORT_TX(
+					ft_mdev, log_max_ft_size));
+		priority = user_priority;
+		break;
 	default:
 		break;
 	}
 
 	max_table_size = min_t(int, max_table_size, MLX5_FS_MAX_ENTRIES);
 
-	ns = mlx5_get_flow_namespace(dev->mdev, ns_type);
+	if (ns_type == MLX5_FLOW_NAMESPACE_RDMA_TRANSPORT_RX ||
+	    ns_type == MLX5_FLOW_NAMESPACE_RDMA_TRANSPORT_TX)
+		ns = mlx5_get_flow_vport_namespace(ft_mdev, ns_type, vport_idx);
+	else
+		ns = mlx5_get_flow_namespace(ft_mdev, ns_type);
+
 	if (!ns)
 		return ERR_PTR(-EOPNOTSUPP);
 
@@ -1497,6 +1557,12 @@  _get_flow_table(struct mlx5_ib_dev *dev, u16 user_priority,
 	case MLX5_FLOW_NAMESPACE_RDMA_TX:
 		prio = &dev->flow_db->rdma_tx[priority];
 		break;
+	case MLX5_FLOW_NAMESPACE_RDMA_TRANSPORT_RX:
+		prio = &dev->flow_db->rdma_transport_rx[ib_port - 1];
+		break;
+	case MLX5_FLOW_NAMESPACE_RDMA_TRANSPORT_TX:
+		prio = &dev->flow_db->rdma_transport_tx[ib_port - 1];
+		break;
 	default: return ERR_PTR(-EINVAL);
 	}
 
@@ -1507,7 +1573,7 @@  _get_flow_table(struct mlx5_ib_dev *dev, u16 user_priority,
 		return prio;
 
 	return _get_prio(dev, ns, prio, priority, max_table_size,
-			 MLX5_FS_MAX_TYPES, flags);
+			 MLX5_FS_MAX_TYPES, flags, vport);
 }
 
 static struct mlx5_ib_flow_handler *
@@ -1626,7 +1692,8 @@  static struct mlx5_ib_flow_handler *raw_fs_rule_add(
 	mutex_lock(&dev->flow_db->lock);
 
 	ft_prio = _get_flow_table(dev, fs_matcher->priority,
-				  fs_matcher->ns_type, mcast);
+				  fs_matcher->ns_type, mcast,
+				  fs_matcher->ib_port);
 	if (IS_ERR(ft_prio)) {
 		err = PTR_ERR(ft_prio);
 		goto unlock;
@@ -1738,6 +1805,12 @@  mlx5_ib_ft_type_to_namespace(enum mlx5_ib_uapi_flow_table_type table_type,
 	case MLX5_IB_UAPI_FLOW_TABLE_TYPE_RDMA_TX:
 		*namespace = MLX5_FLOW_NAMESPACE_RDMA_TX;
 		break;
+	case MLX5_IB_UAPI_FLOW_TABLE_TYPE_RDMA_TRANSPORT_RX:
+		*namespace = MLX5_FLOW_NAMESPACE_RDMA_TRANSPORT_RX;
+		break;
+	case MLX5_IB_UAPI_FLOW_TABLE_TYPE_RDMA_TRANSPORT_TX:
+		*namespace = MLX5_FLOW_NAMESPACE_RDMA_TRANSPORT_TX;
+		break;
 	default:
 		return -EINVAL;
 	}
@@ -1827,7 +1900,8 @@  static int get_dests(struct uverbs_attr_bundle *attrs,
 		return -EINVAL;
 
 	/* Allow only DEVX object or QP as dest when inserting to RDMA_RX */
-	if ((fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_RX) &&
+	if ((fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_RX ||
+	     fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_TRANSPORT_RX) &&
 	    ((!dest_devx && !dest_qp) || (dest_devx && dest_qp)))
 		return -EINVAL;
 
@@ -1844,7 +1918,8 @@  static int get_dests(struct uverbs_attr_bundle *attrs,
 			return -EINVAL;
 		/* Allow only flow table as dest when inserting to FDB or RDMA_RX */
 		if ((fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_FDB_BYPASS ||
-		     fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_RX) &&
+		     fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_RX ||
+		     fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_TRANSPORT_RX) &&
 		    *dest_type != MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE)
 			return -EINVAL;
 	} else if (dest_qp) {
@@ -1865,14 +1940,16 @@  static int get_dests(struct uverbs_attr_bundle *attrs,
 			*dest_id = mqp->raw_packet_qp.rq.tirn;
 		*dest_type = MLX5_FLOW_DESTINATION_TYPE_TIR;
 	} else if ((fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_EGRESS ||
-		    fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_TX) &&
+		    fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_TX ||
+		    fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_TRANSPORT_TX) &&
 		   !(*flags & MLX5_IB_ATTR_CREATE_FLOW_FLAGS_DROP)) {
 		*dest_type = MLX5_FLOW_DESTINATION_TYPE_PORT;
 	}
 
 	if (*dest_type == MLX5_FLOW_DESTINATION_TYPE_TIR &&
 	    (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_EGRESS ||
-	     fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_TX))
+	     fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_TX ||
+	     fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_TRANSPORT_TX))
 		return -EINVAL;
 
 	return 0;
@@ -2386,6 +2463,22 @@  static int UVERBS_HANDLER(MLX5_IB_METHOD_FLOW_MATCHER_CREATE)(
 		goto end;
 	}
 
+	if (uverbs_attr_is_valid(attrs, MLX5_IB_ATTR_FLOW_MATCHER_IB_PORT)) {
+		err = uverbs_copy_from(&obj->ib_port, attrs,
+				       MLX5_IB_ATTR_FLOW_MATCHER_IB_PORT);
+		if (err)
+			goto end;
+		if (!rdma_is_port_valid(&dev->ib_dev, obj->ib_port)) {
+			err = -EINVAL;
+			goto end;
+		}
+		if (obj->ns_type != MLX5_FLOW_NAMESPACE_RDMA_TRANSPORT_RX &&
+		    obj->ns_type != MLX5_FLOW_NAMESPACE_RDMA_TRANSPORT_TX) {
+			err = -EINVAL;
+			goto end;
+		}
+	}
+
 	uobj->object = obj;
 	obj->mdev = dev->mdev;
 	atomic_set(&obj->usecnt, 0);
@@ -2433,7 +2526,7 @@  static int UVERBS_HANDLER(MLX5_IB_METHOD_STEERING_ANCHOR_CREATE)(
 
 	mutex_lock(&dev->flow_db->lock);
 
-	ft_prio = _get_flow_table(dev, priority, ns_type, 0);
+	ft_prio = _get_flow_table(dev, priority, ns_type, 0, 0);
 	if (IS_ERR(ft_prio)) {
 		err = PTR_ERR(ft_prio);
 		goto free_obj;
@@ -2819,7 +2912,10 @@  DECLARE_UVERBS_NAMED_METHOD(
 			     UA_OPTIONAL),
 	UVERBS_ATTR_CONST_IN(MLX5_IB_ATTR_FLOW_MATCHER_FT_TYPE,
 			     enum mlx5_ib_uapi_flow_table_type,
-			     UA_OPTIONAL));
+			     UA_OPTIONAL),
+	UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_FLOW_MATCHER_IB_PORT,
+			   UVERBS_ATTR_TYPE(u32),
+			   UA_OPTIONAL));
 
 DECLARE_UVERBS_NAMED_METHOD_DESTROY(
 	MLX5_IB_METHOD_FLOW_MATCHER_DESTROY,
@@ -2889,8 +2985,26 @@  int mlx5_ib_fs_init(struct mlx5_ib_dev *dev)
 	if (!dev->flow_db)
 		return -ENOMEM;
 
+	dev->flow_db->rdma_transport_rx = kcalloc(dev->num_ports,
+					sizeof(struct mlx5_ib_flow_prio),
+					GFP_KERNEL);
+	if (!dev->flow_db->rdma_transport_rx)
+		goto free_flow_db;
+
+	dev->flow_db->rdma_transport_tx = kcalloc(dev->num_ports,
+					sizeof(struct mlx5_ib_flow_prio),
+					GFP_KERNEL);
+	if (!dev->flow_db->rdma_transport_tx)
+		goto free_rdma_transport_rx;
+
 	mutex_init(&dev->flow_db->lock);
 
 	ib_set_device_ops(&dev->ib_dev, &flow_ops);
 	return 0;
+
+free_rdma_transport_rx:
+	kfree(dev->flow_db->rdma_transport_rx);
+free_flow_db:
+	kfree(dev->flow_db);
+	return -ENOMEM;
 }
diff --git a/drivers/infiniband/hw/mlx5/fs.h b/drivers/infiniband/hw/mlx5/fs.h
index b9734904f5f01..0516555eb1c17 100644
--- a/drivers/infiniband/hw/mlx5/fs.h
+++ b/drivers/infiniband/hw/mlx5/fs.h
@@ -40,6 +40,8 @@  static inline void mlx5_ib_fs_cleanup(struct mlx5_ib_dev *dev)
 	 * is a safe assumption that all references are gone.
 	 */
 	mlx5_ib_fs_cleanup_anchor(dev);
+	kfree(dev->flow_db->rdma_transport_tx);
+	kfree(dev->flow_db->rdma_transport_rx);
 	kfree(dev->flow_db);
 }
 #endif /* _MLX5_IB_FS_H */
diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h
index 6b6e8ca2f9070..a0138bdfa3894 100644
--- a/drivers/infiniband/hw/mlx5/mlx5_ib.h
+++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h
@@ -276,6 +276,7 @@  struct mlx5_ib_flow_matcher {
 	struct mlx5_core_dev	*mdev;
 	atomic_t		usecnt;
 	u8			match_criteria_enable;
+	u32			ib_port;
 };
 
 struct mlx5_ib_steering_anchor {
@@ -307,6 +308,8 @@  struct mlx5_ib_flow_db {
 	struct mlx5_ib_flow_prio	rdma_tx[MLX5_IB_NUM_FLOW_FT];
 	struct mlx5_ib_flow_prio	opfcs[MLX5_IB_OPCOUNTER_MAX];
 	struct mlx5_flow_table		*lag_demux_ft;
+	struct mlx5_ib_flow_prio        *rdma_transport_rx;
+	struct mlx5_ib_flow_prio        *rdma_transport_tx;
 	/* Protect flow steering bypass flow tables
 	 * when add/del flow rules.
 	 * only single add/removal of flow steering rule could be done
diff --git a/include/uapi/rdma/mlx5_user_ioctl_cmds.h b/include/uapi/rdma/mlx5_user_ioctl_cmds.h
index fd2e4a3a56b36..18f9fe0702132 100644
--- a/include/uapi/rdma/mlx5_user_ioctl_cmds.h
+++ b/include/uapi/rdma/mlx5_user_ioctl_cmds.h
@@ -239,6 +239,7 @@  enum mlx5_ib_flow_matcher_create_attrs {
 	MLX5_IB_ATTR_FLOW_MATCHER_MATCH_CRITERIA,
 	MLX5_IB_ATTR_FLOW_MATCHER_FLOW_FLAGS,
 	MLX5_IB_ATTR_FLOW_MATCHER_FT_TYPE,
+	MLX5_IB_ATTR_FLOW_MATCHER_IB_PORT,
 };
 
 enum mlx5_ib_flow_matcher_destroy_attrs {
diff --git a/include/uapi/rdma/mlx5_user_ioctl_verbs.h b/include/uapi/rdma/mlx5_user_ioctl_verbs.h
index 7c233df475e71..8f86e79d78a5f 100644
--- a/include/uapi/rdma/mlx5_user_ioctl_verbs.h
+++ b/include/uapi/rdma/mlx5_user_ioctl_verbs.h
@@ -45,6 +45,8 @@  enum mlx5_ib_uapi_flow_table_type {
 	MLX5_IB_UAPI_FLOW_TABLE_TYPE_FDB	= 0x2,
 	MLX5_IB_UAPI_FLOW_TABLE_TYPE_RDMA_RX	= 0x3,
 	MLX5_IB_UAPI_FLOW_TABLE_TYPE_RDMA_TX	= 0x4,
+	MLX5_IB_UAPI_FLOW_TABLE_TYPE_RDMA_TRANSPORT_RX	= 0x5,
+	MLX5_IB_UAPI_FLOW_TABLE_TYPE_RDMA_TRANSPORT_TX	= 0x6,
 };
 
 enum mlx5_ib_uapi_flow_action_packet_reformat_type {