diff mbox series

[net-next,2/6] net/mlx5: CT: Separate CT and CT-NAT tuple entries

Message ID 20240613210036.1125203-3-tariqt@nvidia.com (mailing list archive)
State Accepted
Commit 49d37d05f216ce026ebd297b76a2db5bff7a4b4d
Delegated to: Netdev Maintainers
Headers show
Series mlx5 misc patches 2023-06-13 | expand

Checks

Context Check Description
netdev/series_format success Posting correctly formatted
netdev/tree_selection success Clearly marked for net-next, async
netdev/ynl success Generated files up to date; no warnings/errors; no diff in generated;
netdev/fixes_present success Fixes tag not required for -next series
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 845 this patch: 845
netdev/build_tools success No tools touched, skip
netdev/cc_maintainers warning 1 maintainers not CCed: linux-rdma@vger.kernel.org
netdev/build_clang success Errors and warnings before: 849 this patch: 849
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/deprecated_api success None detected
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 849 this patch: 849
netdev/checkpatch warning WARNING: line length of 81 exceeds 80 columns WARNING: line length of 82 exceeds 80 columns WARNING: line length of 84 exceeds 80 columns WARNING: line length of 85 exceeds 80 columns WARNING: line length of 87 exceeds 80 columns WARNING: line length of 92 exceeds 80 columns
netdev/build_clang_rust success No Rust files in patch. Skipping build
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0
netdev/contest success net-next-2024-06-14--21-00 (tests: 650)

Commit Message

Tariq Toukan June 13, 2024, 9 p.m. UTC
From: Chris Mi <cmi@nvidia.com>

Currently a ct entry is stored in both ct and ct-nat tables. ct
action is directed to the ct table, while ct nat action is directed
to the nat table. ct-nat entries perform the nat header rewrites,
if required. The current design assures that a ct action will match
in hardware even if the tuple has nat configured, it will just not
execute it. However, storing each connection in two tables increases
the system's memory consumption while reducing its insertion rate.

Offload a connection to either ct or the ct-nat table. Add a miss
fall-through rule from ct-nat table to the ct table allowing ct(nat)
action on non-natted connections.

ct action on natted connections, by default, will be handled by the
software miss path.

Signed-off-by: Paul Blakey <paulb@nvidia.com>
Signed-off-by: Chris Mi <cmi@nvidia.com>
Signed-off-by: Tariq Toukan <tariqt@nvidia.com>
---
 .../ethernet/mellanox/mlx5/core/en/tc_ct.c    | 187 +++++++++++++-----
 1 file changed, 143 insertions(+), 44 deletions(-)
diff mbox series

Patch

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c
index fadfa8b50beb..b49d87a51f21 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c
@@ -69,6 +69,8 @@  struct mlx5_tc_ct_priv {
 	struct rhashtable ct_tuples_nat_ht;
 	struct mlx5_flow_table *ct;
 	struct mlx5_flow_table *ct_nat;
+	struct mlx5_flow_group *ct_nat_miss_group;
+	struct mlx5_flow_handle *ct_nat_miss_rule;
 	struct mlx5e_post_act *post_act;
 	struct mutex control_lock; /* guards parallel adds/dels */
 	struct mapping_ctx *zone_mapping;
@@ -141,6 +143,8 @@  struct mlx5_ct_counter {
 
 enum {
 	MLX5_CT_ENTRY_FLAG_VALID,
+	MLX5_CT_ENTRY_IN_CT_TABLE,
+	MLX5_CT_ENTRY_IN_CT_NAT_TABLE,
 };
 
 struct mlx5_ct_entry {
@@ -198,9 +202,15 @@  static const struct rhashtable_params tuples_nat_ht_params = {
 };
 
 static bool
-mlx5_tc_ct_entry_has_nat(struct mlx5_ct_entry *entry)
+mlx5_tc_ct_entry_in_ct_table(struct mlx5_ct_entry *entry)
 {
-	return !!(entry->tuple_nat_node.next);
+	return test_bit(MLX5_CT_ENTRY_IN_CT_TABLE, &entry->flags);
+}
+
+static bool
+mlx5_tc_ct_entry_in_ct_nat_table(struct mlx5_ct_entry *entry)
+{
+	return test_bit(MLX5_CT_ENTRY_IN_CT_NAT_TABLE, &entry->flags);
 }
 
 static int
@@ -526,8 +536,10 @@  static void
 mlx5_tc_ct_entry_del_rules(struct mlx5_tc_ct_priv *ct_priv,
 			   struct mlx5_ct_entry *entry)
 {
-	mlx5_tc_ct_entry_del_rule(ct_priv, entry, true);
-	mlx5_tc_ct_entry_del_rule(ct_priv, entry, false);
+	if (mlx5_tc_ct_entry_in_ct_nat_table(entry))
+		mlx5_tc_ct_entry_del_rule(ct_priv, entry, true);
+	if (mlx5_tc_ct_entry_in_ct_table(entry))
+		mlx5_tc_ct_entry_del_rule(ct_priv, entry, false);
 
 	atomic_dec(&ct_priv->debugfs.stats.offloaded);
 }
@@ -814,7 +826,7 @@  mlx5_tc_ct_entry_add_rule(struct mlx5_tc_ct_priv *ct_priv,
 					      &zone_rule->mh,
 					      zone_restore_id,
 					      nat,
-					      mlx5_tc_ct_entry_has_nat(entry));
+					      mlx5_tc_ct_entry_in_ct_nat_table(entry));
 	if (err) {
 		ct_dbg("Failed to create ct entry mod hdr");
 		goto err_mod_hdr;
@@ -888,7 +900,7 @@  mlx5_tc_ct_entry_replace_rule(struct mlx5_tc_ct_priv *ct_priv,
 	*old_attr = *attr;
 
 	err = mlx5_tc_ct_entry_create_mod_hdr(ct_priv, attr, flow_rule, &mh, zone_restore_id,
-					      nat, mlx5_tc_ct_entry_has_nat(entry));
+					      nat, mlx5_tc_ct_entry_in_ct_nat_table(entry));
 	if (err) {
 		ct_dbg("Failed to create ct entry mod hdr");
 		goto err_mod_hdr;
@@ -957,11 +969,13 @@  static void mlx5_tc_ct_entry_remove_from_tuples(struct mlx5_ct_entry *entry)
 {
 	struct mlx5_tc_ct_priv *ct_priv = entry->ct_priv;
 
-	rhashtable_remove_fast(&ct_priv->ct_tuples_nat_ht,
-			       &entry->tuple_nat_node,
-			       tuples_nat_ht_params);
-	rhashtable_remove_fast(&ct_priv->ct_tuples_ht, &entry->tuple_node,
-			       tuples_ht_params);
+	if (mlx5_tc_ct_entry_in_ct_nat_table(entry))
+		rhashtable_remove_fast(&ct_priv->ct_tuples_nat_ht,
+				       &entry->tuple_nat_node,
+				       tuples_nat_ht_params);
+	if (mlx5_tc_ct_entry_in_ct_table(entry))
+		rhashtable_remove_fast(&ct_priv->ct_tuples_ht, &entry->tuple_node,
+				       tuples_ht_params);
 }
 
 static void mlx5_tc_ct_entry_del(struct mlx5_ct_entry *entry)
@@ -1100,21 +1114,26 @@  mlx5_tc_ct_entry_add_rules(struct mlx5_tc_ct_priv *ct_priv,
 		return err;
 	}
 
-	err = mlx5_tc_ct_entry_add_rule(ct_priv, flow_rule, entry, false,
-					zone_restore_id);
-	if (err)
-		goto err_orig;
+	if (mlx5_tc_ct_entry_in_ct_table(entry)) {
+		err = mlx5_tc_ct_entry_add_rule(ct_priv, flow_rule, entry, false,
+						zone_restore_id);
+		if (err)
+			goto err_orig;
+	}
 
-	err = mlx5_tc_ct_entry_add_rule(ct_priv, flow_rule, entry, true,
-					zone_restore_id);
-	if (err)
-		goto err_nat;
+	if (mlx5_tc_ct_entry_in_ct_nat_table(entry)) {
+		err = mlx5_tc_ct_entry_add_rule(ct_priv, flow_rule, entry, true,
+						zone_restore_id);
+		if (err)
+			goto err_nat;
+	}
 
 	atomic_inc(&ct_priv->debugfs.stats.offloaded);
 	return 0;
 
 err_nat:
-	mlx5_tc_ct_entry_del_rule(ct_priv, entry, false);
+	if (mlx5_tc_ct_entry_in_ct_table(entry))
+		mlx5_tc_ct_entry_del_rule(ct_priv, entry, false);
 err_orig:
 	mlx5_tc_ct_counter_put(ct_priv, entry);
 	return err;
@@ -1128,15 +1147,19 @@  mlx5_tc_ct_entry_replace_rules(struct mlx5_tc_ct_priv *ct_priv,
 {
 	int err;
 
-	err = mlx5_tc_ct_entry_replace_rule(ct_priv, flow_rule, entry, false,
-					    zone_restore_id);
-	if (err)
-		return err;
+	if (mlx5_tc_ct_entry_in_ct_table(entry)) {
+		err = mlx5_tc_ct_entry_replace_rule(ct_priv, flow_rule, entry, false,
+						    zone_restore_id);
+		if (err)
+			return err;
+	}
 
-	err = mlx5_tc_ct_entry_replace_rule(ct_priv, flow_rule, entry, true,
-					    zone_restore_id);
-	if (err)
-		mlx5_tc_ct_entry_del_rule(ct_priv, entry, false);
+	if (mlx5_tc_ct_entry_in_ct_nat_table(entry)) {
+		err = mlx5_tc_ct_entry_replace_rule(ct_priv, flow_rule, entry, true,
+						    zone_restore_id);
+		if (err && mlx5_tc_ct_entry_in_ct_table(entry))
+			mlx5_tc_ct_entry_del_rule(ct_priv, entry, false);
+	}
 	return err;
 }
 
@@ -1224,18 +1247,24 @@  mlx5_tc_ct_block_flow_offload_add(struct mlx5_ct_ft *ft,
 	if (err)
 		goto err_entries;
 
-	err = rhashtable_lookup_insert_fast(&ct_priv->ct_tuples_ht,
-					    &entry->tuple_node,
-					    tuples_ht_params);
-	if (err)
-		goto err_tuple;
-
 	if (memcmp(&entry->tuple, &entry->tuple_nat, sizeof(entry->tuple))) {
 		err = rhashtable_lookup_insert_fast(&ct_priv->ct_tuples_nat_ht,
 						    &entry->tuple_nat_node,
 						    tuples_nat_ht_params);
 		if (err)
 			goto err_tuple_nat;
+
+		set_bit(MLX5_CT_ENTRY_IN_CT_NAT_TABLE, &entry->flags);
+	}
+
+	if (!mlx5_tc_ct_entry_in_ct_nat_table(entry)) {
+		err = rhashtable_lookup_insert_fast(&ct_priv->ct_tuples_ht,
+						    &entry->tuple_node,
+						    tuples_ht_params);
+		if (err)
+			goto err_tuple;
+
+		set_bit(MLX5_CT_ENTRY_IN_CT_TABLE, &entry->flags);
 	}
 	spin_unlock_bh(&ct_priv->ht_lock);
 
@@ -1251,17 +1280,10 @@  mlx5_tc_ct_block_flow_offload_add(struct mlx5_ct_ft *ft,
 
 err_rules:
 	spin_lock_bh(&ct_priv->ht_lock);
-	if (mlx5_tc_ct_entry_has_nat(entry))
-		rhashtable_remove_fast(&ct_priv->ct_tuples_nat_ht,
-				       &entry->tuple_nat_node, tuples_nat_ht_params);
-err_tuple_nat:
-	rhashtable_remove_fast(&ct_priv->ct_tuples_ht,
-			       &entry->tuple_node,
-			       tuples_ht_params);
 err_tuple:
-	rhashtable_remove_fast(&ft->ct_entries_ht,
-			       &entry->node,
-			       cts_ht_params);
+	mlx5_tc_ct_entry_remove_from_tuples(entry);
+err_tuple_nat:
+	rhashtable_remove_fast(&ft->ct_entries_ht, &entry->node, cts_ht_params);
 err_entries:
 	spin_unlock_bh(&ct_priv->ht_lock);
 err_set:
@@ -2149,6 +2171,76 @@  mlx5_ct_tc_remove_dbgfs(struct mlx5_tc_ct_priv *ct_priv)
 	debugfs_remove_recursive(ct_priv->debugfs.root);
 }
 
+static struct mlx5_flow_handle *
+tc_ct_add_miss_rule(struct mlx5_flow_table *ft,
+		    struct mlx5_flow_table *next_ft)
+{
+	struct mlx5_flow_destination dest = {};
+	struct mlx5_flow_act act = {};
+
+	act.flags  = FLOW_ACT_IGNORE_FLOW_LEVEL | FLOW_ACT_NO_APPEND;
+	act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
+	dest.type  = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
+	dest.ft = next_ft;
+
+	return mlx5_add_flow_rules(ft, NULL, &act, &dest, 1);
+}
+
+static int
+tc_ct_add_ct_table_miss_rule(struct mlx5_flow_table *from,
+			     struct mlx5_flow_table *to,
+			     struct mlx5_flow_group **miss_group,
+			     struct mlx5_flow_handle **miss_rule)
+{
+	int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
+	struct mlx5_flow_group *group;
+	struct mlx5_flow_handle *rule;
+	unsigned int max_fte = from->max_fte;
+	u32 *flow_group_in;
+	int err = 0;
+
+	flow_group_in = kvzalloc(inlen, GFP_KERNEL);
+	if (!flow_group_in)
+		return -ENOMEM;
+
+	/* create miss group */
+	MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index,
+		 max_fte - 2);
+	MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index,
+		 max_fte - 1);
+	group = mlx5_create_flow_group(from, flow_group_in);
+	if (IS_ERR(group)) {
+		err = PTR_ERR(group);
+		goto err_miss_grp;
+	}
+
+	/* add miss rule to next fdb */
+	rule = tc_ct_add_miss_rule(from, to);
+	if (IS_ERR(rule)) {
+		err = PTR_ERR(rule);
+		goto err_miss_rule;
+	}
+
+	*miss_group = group;
+	*miss_rule = rule;
+	kvfree(flow_group_in);
+	return 0;
+
+err_miss_rule:
+	mlx5_destroy_flow_group(group);
+err_miss_grp:
+	kvfree(flow_group_in);
+	return err;
+}
+
+static void
+tc_ct_del_ct_table_miss_rule(struct mlx5_flow_group *miss_group,
+			     struct mlx5_flow_handle *miss_rule)
+{
+	mlx5_del_flow_rules(miss_rule);
+	mlx5_destroy_flow_group(miss_group);
+}
+
 #define INIT_ERR_PREFIX "tc ct offload init failed"
 
 struct mlx5_tc_ct_priv *
@@ -2212,6 +2304,12 @@  mlx5_tc_ct_init(struct mlx5e_priv *priv, struct mlx5_fs_chains *chains,
 		goto err_ct_nat_tbl;
 	}
 
+	err = tc_ct_add_ct_table_miss_rule(ct_priv->ct_nat, ct_priv->ct,
+					   &ct_priv->ct_nat_miss_group,
+					   &ct_priv->ct_nat_miss_rule);
+	if (err)
+		goto err_ct_zone_ht;
+
 	ct_priv->post_act = post_act;
 	mutex_init(&ct_priv->control_lock);
 	if (rhashtable_init(&ct_priv->zone_ht, &zone_params))
@@ -2273,6 +2371,7 @@  mlx5_tc_ct_clean(struct mlx5_tc_ct_priv *ct_priv)
 	ct_priv->fs_ops->destroy(ct_priv->fs);
 	kfree(ct_priv->fs);
 
+	tc_ct_del_ct_table_miss_rule(ct_priv->ct_nat_miss_group, ct_priv->ct_nat_miss_rule);
 	mlx5_chains_destroy_global_table(chains, ct_priv->ct_nat);
 	mlx5_chains_destroy_global_table(chains, ct_priv->ct);
 	mapping_destroy(ct_priv->zone_mapping);