diff mbox series

[net-next,06/14] net/mlx5: qos: Always create group0

Message ID 20241008183222.137702-7-tariqt@nvidia.com (mailing list archive)
State Accepted
Commit a87a561b802a45d37bc34e5a8e4f57a213ea713f
Delegated to: Netdev Maintainers
Headers show
Series net/mlx5: qos: Refactor esw qos to support new features | expand

Checks

Context Check Description
netdev/series_format success Posting correctly formatted
netdev/tree_selection success Clearly marked for net-next
netdev/ynl success Generated files up to date; no warnings/errors; no diff in generated;
netdev/fixes_present success Fixes tag not required for -next series
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 6 this patch: 6
netdev/build_tools success No tools touched, skip
netdev/cc_maintainers warning 1 maintainers not CCed: linux-rdma@vger.kernel.org
netdev/build_clang success Errors and warnings before: 6 this patch: 6
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/deprecated_api success None detected
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 5 this patch: 5
netdev/checkpatch warning WARNING: line length of 81 exceeds 80 columns WARNING: line length of 83 exceeds 80 columns WARNING: line length of 86 exceeds 80 columns WARNING: line length of 92 exceeds 80 columns WARNING: line length of 95 exceeds 80 columns
netdev/build_clang_rust success No Rust files in patch. Skipping build
netdev/kdoc success Errors and warnings before: 1 this patch: 1
netdev/source_inline success Was 0 now: 0
netdev/contest success net-next-2024-10-10--09-00 (tests: 775)

Commit Message

Tariq Toukan Oct. 8, 2024, 6:32 p.m. UTC
From: Cosmin Ratiu <cratiu@nvidia.com>

All vports not explicitly members of a group with QoS enabled are part
of the internal esw group0, except when the hw reports that groups
aren't supported (log_esw_max_sched_depth == 0). This creates corner
cases in the code, which has to make sure that this case is supported.
Additionally, the groups are about to be moved out of eswitches, and
group0 being NULL creates additional complications there.

This patch makes sure to always create group0, even if max sched depth
is 0. In that case, a software-only group0 is created referencing the
root TSAR. Vports can point to this group when their QoS is enabled and
they'll be attached to the root TSAR directly. This eliminates corner
cases in the code by offering the guarantee that if qos is enabled,
vport->qos.group is non-NULL.

Signed-off-by: Cosmin Ratiu <cratiu@nvidia.com>
Signed-off-by: Tariq Toukan <tariqt@nvidia.com>
---
 .../net/ethernet/mellanox/mlx5/core/esw/qos.c | 36 +++++++++++--------
 .../net/ethernet/mellanox/mlx5/core/eswitch.h | 12 ++++---
 2 files changed, 30 insertions(+), 18 deletions(-)
diff mbox series

Patch

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c
index cfff1413dcfc..958b8894f5c0 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c
@@ -113,7 +113,7 @@  static u32 esw_qos_calculate_group_min_rate_divider(struct mlx5_eswitch *esw,
 	/* If vports max min_rate divider is 0 but their group has bw_share
 	 * configured, then set bw_share for vports to minimal value.
 	 */
-	if (group && group->bw_share)
+	if (group->bw_share)
 		return 1;
 
 	/* A divider of 0 sets bw_share for all group vports to 0,
@@ -132,7 +132,7 @@  static u32 esw_qos_calculate_min_rate_divider(struct mlx5_eswitch *esw)
 	 * This will correspond to fw_max_bw_share in the final bw_share calculation.
 	 */
 	list_for_each_entry(group, &esw->qos.groups, list) {
-		if (group->min_rate < max_guarantee)
+		if (group->min_rate < max_guarantee || group->tsar_ix == esw->qos.root_tsar_ix)
 			continue;
 		max_guarantee = group->min_rate;
 	}
@@ -188,6 +188,8 @@  static int esw_qos_normalize_min_rate(struct mlx5_eswitch *esw, struct netlink_e
 	int err;
 
 	list_for_each_entry(group, &esw->qos.groups, list) {
+		if (group->tsar_ix == esw->qos.root_tsar_ix)
+			continue;
 		bw_share = esw_qos_calc_bw_share(group->min_rate, divider, fw_max_bw_share);
 
 		if (bw_share == group->bw_share)
@@ -252,7 +254,7 @@  static int esw_qos_set_vport_max_rate(struct mlx5_eswitch *esw, struct mlx5_vpor
 		return 0;
 
 	/* Use parent group limit if new max rate is 0. */
-	if (vport->qos.group && !max_rate)
+	if (!max_rate)
 		act_max_rate = vport->qos.group->max_rate;
 
 	err = esw_qos_vport_config(esw, vport, act_max_rate, vport->qos.bw_share, extack);
@@ -348,19 +350,17 @@  static int esw_qos_vport_create_sched_element(struct mlx5_eswitch *esw,
 	u32 sched_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {};
 	struct mlx5_esw_rate_group *group = vport->qos.group;
 	struct mlx5_core_dev *dev = esw->dev;
-	u32 parent_tsar_ix;
 	void *attr;
 	int err;
 
 	if (!esw_qos_element_type_supported(dev, SCHEDULING_CONTEXT_ELEMENT_TYPE_VPORT))
 		return -EOPNOTSUPP;
 
-	parent_tsar_ix = group ? group->tsar_ix : esw->qos.root_tsar_ix;
 	MLX5_SET(scheduling_context, sched_ctx, element_type,
 		 SCHEDULING_CONTEXT_ELEMENT_TYPE_VPORT);
 	attr = MLX5_ADDR_OF(scheduling_context, sched_ctx, element_attributes);
 	MLX5_SET(vport_element, attr, vport_number, vport->vport);
-	MLX5_SET(scheduling_context, sched_ctx, parent_element_id, parent_tsar_ix);
+	MLX5_SET(scheduling_context, sched_ctx, parent_element_id, group->tsar_ix);
 	MLX5_SET(scheduling_context, sched_ctx, max_average_bw, max_rate);
 	MLX5_SET(scheduling_context, sched_ctx, bw_share, bw_share);
 
@@ -605,12 +605,17 @@  static int esw_qos_create(struct mlx5_eswitch *esw, struct netlink_ext_ack *exta
 	INIT_LIST_HEAD(&esw->qos.groups);
 	if (MLX5_CAP_QOS(dev, log_esw_max_sched_depth)) {
 		esw->qos.group0 = __esw_qos_create_rate_group(esw, extack);
-		if (IS_ERR(esw->qos.group0)) {
-			esw_warn(dev, "E-Switch create rate group 0 failed (%ld)\n",
-				 PTR_ERR(esw->qos.group0));
-			err = PTR_ERR(esw->qos.group0);
-			goto err_group0;
-		}
+	} else {
+		/* The eswitch doesn't support scheduling groups.
+		 * Create a software-only group0 using the root TSAR to attach vport QoS to.
+		 */
+		if (!__esw_qos_alloc_rate_group(esw, esw->qos.root_tsar_ix))
+			esw->qos.group0 = ERR_PTR(-ENOMEM);
+	}
+	if (IS_ERR(esw->qos.group0)) {
+		err = PTR_ERR(esw->qos.group0);
+		esw_warn(dev, "E-Switch create rate group 0 failed (%d)\n", err);
+		goto err_group0;
 	}
 	refcount_set(&esw->qos.refcnt, 1);
 
@@ -628,8 +633,11 @@  static void esw_qos_destroy(struct mlx5_eswitch *esw)
 {
 	int err;
 
-	if (esw->qos.group0)
+	if (esw->qos.group0->tsar_ix != esw->qos.root_tsar_ix)
 		__esw_qos_destroy_rate_group(esw, esw->qos.group0, NULL);
+	else
+		__esw_qos_free_rate_group(esw->qos.group0);
+	esw->qos.group0 = NULL;
 
 	err = mlx5_destroy_scheduling_element_cmd(esw->dev,
 						  SCHEDULING_HIERARCHY_E_SWITCH,
@@ -699,7 +707,7 @@  void mlx5_esw_qos_vport_disable(struct mlx5_eswitch *esw, struct mlx5_vport *vpo
 	lockdep_assert_held(&esw->state_lock);
 	if (!vport->qos.enabled)
 		return;
-	WARN(vport->qos.group && vport->qos.group != esw->qos.group0,
+	WARN(vport->qos.group != esw->qos.group0,
 	     "Disabling QoS on port before detaching it from group");
 
 	err = mlx5_destroy_scheduling_element_cmd(esw->dev,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
index f208ae16bfd2..fec9e843f673 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
@@ -213,6 +213,7 @@  struct mlx5_vport {
 	struct mlx5_vport_info  info;
 
 	struct {
+		/* Initially false, set to true whenever any QoS features are used. */
 		bool enabled;
 		u32 esw_sched_elem_ix;
 		u32 min_rate;
@@ -362,14 +363,17 @@  struct mlx5_eswitch {
 	atomic64_t user_count;
 
 	struct {
-		u32             root_tsar_ix;
-		struct mlx5_esw_rate_group *group0;
-		struct list_head groups; /* Protected by esw->state_lock */
-
 		/* Protected by esw->state_lock.
 		 * Initially 0, meaning no QoS users and QoS is disabled.
 		 */
 		refcount_t refcnt;
+		u32 root_tsar_ix;
+		/* Contains all vports with QoS enabled but no explicit group.
+		 * Cannot be NULL if QoS is enabled, but may be a fake group
+		 * referencing the root TSAR if the esw doesn't support groups.
+		 */
+		struct mlx5_esw_rate_group *group0;
+		struct list_head groups; /* Protected by esw->state_lock */
 	} qos;
 
 	struct mlx5_esw_bridge_offloads *br_offloads;