diff mbox series

[RFC,net-next,v2,2/2] net: flow_offload: add action stats api

Message ID 20221003061743.28171-3-ozsh@nvidia.com (mailing list archive)
State RFC
Delegated to: Netdev Maintainers
Headers show
Series net: flow_offload: add support for per action hw stats | expand

Checks

Context Check Description
netdev/tree_selection success Clearly marked for net-next
netdev/fixes_present success Fixes tag not required for -next series
netdev/subject_prefix success Link
netdev/cover_letter success Series has a cover letter
netdev/patch_count success Link
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit fail Errors and warnings before: 1761 this patch: 1773
netdev/cc_maintainers warning 5 maintainers not CCed: kuba@kernel.org davem@davemloft.net xiyou.wangcong@gmail.com edumazet@google.com pabeni@redhat.com
netdev/build_clang fail Errors and warnings before: 168 this patch: 170
netdev/module_param success Was 0 now: 0
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 1910 this patch: 1910
netdev/checkpatch warning WARNING: line length of 81 exceeds 80 columns
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0

Commit Message

Oz Shlomo Oct. 3, 2022, 6:17 a.m. UTC
The current offload api provides visibility to flow hw stats.
This works as long as the flow stats values apply to all the flow's
actions. However, this assumption breaks when an action, such as police,
drops or jumps over other actions.

Extend the flow_offload api to return stat record per action instance.
Use the specific action, identified with an action cookie, stats value
when updating the action's hardware stats.

Signed-off-by: Oz Shlomo <ozsh@nvidia.com>
---
 include/net/flow_offload.h | 10 ++++++++++
 include/net/pkt_cls.h      | 18 ++++++++++++++++--
 net/sched/cls_api.c        |  1 +
 net/sched/cls_flower.c     |  3 ++-
 net/sched/cls_matchall.c   |  2 +-
 5 files changed, 30 insertions(+), 4 deletions(-)

Comments

Oz Shlomo Oct. 4, 2022, 8:22 a.m. UTC | #1
On 10/3/2022 9:17 AM, Oz Shlomo wrote:
> The current offload api provides visibility to flow hw stats.
> This works as long as the flow stats values apply to all the flow's
> actions. However, this assumption breaks when an action, such as police,
> drops or jumps over other actions.
> 
> Extend the flow_offload api to return stat record per action instance.
> Use the specific action, identified with an action cookie, stats value
> when updating the action's hardware stats.
> 
> Signed-off-by: Oz Shlomo <ozsh@nvidia.com>
> ---
>   include/net/flow_offload.h | 10 ++++++++++
>   include/net/pkt_cls.h      | 18 ++++++++++++++++--
>   net/sched/cls_api.c        |  1 +
>   net/sched/cls_flower.c     |  3 ++-
>   net/sched/cls_matchall.c   |  2 +-
>   5 files changed, 30 insertions(+), 4 deletions(-)
> 
> diff --git a/include/net/flow_offload.h b/include/net/flow_offload.h
> index e343f9f8363e..1c88ca113544 100644
> --- a/include/net/flow_offload.h
> +++ b/include/net/flow_offload.h
> @@ -213,6 +213,8 @@ struct flow_action_cookie {
>   	u8 cookie[];
>   };
>   
> +#define FLOW_OFFLOAD_MAX_ACT_STATS 32
> +
>   struct flow_action_cookie *flow_action_cookie_create(void *data,
>   						     unsigned int len,
>   						     gfp_t gfp);
> @@ -221,6 +223,7 @@ struct flow_action_cookie *flow_action_cookie_create(void *data,
>   struct flow_action_entry {
>   	enum flow_action_id		id;
>   	u32				hw_index;
> +	unsigned long			act_cookie;
>   	enum flow_action_hw_stats	hw_stats;
>   	action_destr			destructor;
>   	void				*destructor_priv;
> @@ -442,6 +445,11 @@ struct flow_stats {
>   	bool used_hw_stats_valid;
>   };
>   
> +struct flow_act_stat {
> +	unsigned long		act_cookie;
> +	struct flow_stats	stats;
> +};
> +
>   static inline void flow_stats_update(struct flow_stats *flow_stats,
>   				     u64 bytes, u64 pkts,
>   				     u64 drops, u64 lastused,
> @@ -588,6 +596,8 @@ struct flow_cls_offload {
>   	unsigned long cookie;
>   	struct flow_rule *rule;
>   	struct flow_stats stats;
> +	struct flow_act_stat act_stats[FLOW_OFFLOAD_MAX_ACT_STATS];

Apparently this array can exceed the stack frame size for several archs 
(reported by the kernel test bot).
I will change this array to be dynamically allocated.

> +	bool use_act_stats;
>   	u32 classid;
>   };
>   
> diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h
> index d5b8fa01da87..642e6f07cbf0 100644
> --- a/include/net/pkt_cls.h
> +++ b/include/net/pkt_cls.h
> @@ -282,13 +282,27 @@ static inline void tcf_exts_put_net(struct tcf_exts *exts)
>   
>   static inline void
>   tcf_exts_hw_stats_update(const struct tcf_exts *exts,
> -			 struct flow_stats *stats)
> +			 struct flow_stats *flow_stats,
> +			 struct flow_act_stat *act_stats,
> +			 bool use_act_stats)
>   {
>   #ifdef CONFIG_NET_CLS_ACT
> +	int nr_actions = exts->nr_actions;
>   	int i;
>   
> -	for (i = 0; i < exts->nr_actions; i++) {
> +	if (use_act_stats)
> +		nr_actions = FLOW_OFFLOAD_MAX_ACT_STATS;
> +
> +	for (i = 0; i < nr_actions; i++) {
>   		struct tc_action *a = exts->actions[i];
> +		struct flow_stats *stats = flow_stats;
> +
> +		if (use_act_stats) {
> +			a = (struct tc_action *)act_stats[i].act_cookie;
> +			if (!a)
> +				break;
> +			stats = &act_stats[i].stats;
> +		}
>   
>   		/* if stats from hw, just skip */
>   		if (tcf_action_update_hw_stats(a)) {
> diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
> index 50566db45949..c5a6a0d7f7a1 100644
> --- a/net/sched/cls_api.c
> +++ b/net/sched/cls_api.c
> @@ -3553,6 +3553,7 @@ int tc_setup_action(struct flow_action *flow_action,
>   		for (k = 0; k < index ; k++) {
>   			entry[k].hw_stats = tc_act_hw_stats(act->hw_stats);
>   			entry[k].hw_index = act->tcfa_index;
> +			entry[k].act_cookie = (unsigned long)act;
>   		}
>   
>   		j += index;
> diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c
> index 82b3e8ff656c..ff004f13d0c9 100644
> --- a/net/sched/cls_flower.c
> +++ b/net/sched/cls_flower.c
> @@ -500,7 +500,8 @@ static void fl_hw_update_stats(struct tcf_proto *tp, struct cls_fl_filter *f,
>   	tc_setup_cb_call(block, TC_SETUP_CLSFLOWER, &cls_flower, false,
>   			 rtnl_held);
>   
> -	tcf_exts_hw_stats_update(&f->exts, &cls_flower.stats);
> +	tcf_exts_hw_stats_update(&f->exts, &cls_flower.stats,
> +				 cls_flower.act_stats, cls_flower.use_act_stats);
>   }
>   
>   static void __fl_put(struct cls_fl_filter *f)
> diff --git a/net/sched/cls_matchall.c b/net/sched/cls_matchall.c
> index 225e87740ec5..3d441063113d 100644
> --- a/net/sched/cls_matchall.c
> +++ b/net/sched/cls_matchall.c
> @@ -329,7 +329,7 @@ static void mall_stats_hw_filter(struct tcf_proto *tp,
>   
>   	tc_setup_cb_call(block, TC_SETUP_CLSMATCHALL, &cls_mall, false, true);
>   
> -	tcf_exts_hw_stats_update(&head->exts, &cls_mall.stats);
> +	tcf_exts_hw_stats_update(&head->exts, &cls_mall.stats, NULL, false);
>   }
>   
>   static int mall_dump(struct net *net, struct tcf_proto *tp, void *fh,
Edward Cree Oct. 6, 2022, 9:53 a.m. UTC | #2
On 04/10/2022 09:22, Oz Shlomo wrote:
>> @@ -588,6 +596,8 @@ struct flow_cls_offload {
>>       unsigned long cookie;
>>       struct flow_rule *rule;
>>       struct flow_stats stats;
>> +    struct flow_act_stat act_stats[FLOW_OFFLOAD_MAX_ACT_STATS];
> 
> Apparently this array can exceed the stack frame size for several archs (reported by the kernel test bot).

Seems to me like yet another sign this array shouldn't exist in the
 first place and you should be calling a driver offload function per
 action and not trying to squeeze all this through the existing
 flow-rule-centric API.
Why can't the action stats be stored in struct flow_action_entry,
 inside struct flow_rule?  Then, if you really want a single call
 for performance reasons, action-stats-aware drivers could just walk
 through flow_action_for_each(..., &flow_rule->action) and update
 each action's stats based on action->act_cookie.  That should still
 allow proper shared action handling, and seems far more elegant
 than this array.  (Any time you have two arrays and there's a close
 connection between a[i] and b[i], that's a good sign one's elements
 should be a struct member of the other's, rather than them being
 tied together only by index.)

-ed
diff mbox series

Patch

diff --git a/include/net/flow_offload.h b/include/net/flow_offload.h
index e343f9f8363e..1c88ca113544 100644
--- a/include/net/flow_offload.h
+++ b/include/net/flow_offload.h
@@ -213,6 +213,8 @@  struct flow_action_cookie {
 	u8 cookie[];
 };
 
+#define FLOW_OFFLOAD_MAX_ACT_STATS 32
+
 struct flow_action_cookie *flow_action_cookie_create(void *data,
 						     unsigned int len,
 						     gfp_t gfp);
@@ -221,6 +223,7 @@  struct flow_action_cookie *flow_action_cookie_create(void *data,
 struct flow_action_entry {
 	enum flow_action_id		id;
 	u32				hw_index;
+	unsigned long			act_cookie;
 	enum flow_action_hw_stats	hw_stats;
 	action_destr			destructor;
 	void				*destructor_priv;
@@ -442,6 +445,11 @@  struct flow_stats {
 	bool used_hw_stats_valid;
 };
 
+struct flow_act_stat {
+	unsigned long		act_cookie;
+	struct flow_stats	stats;
+};
+
 static inline void flow_stats_update(struct flow_stats *flow_stats,
 				     u64 bytes, u64 pkts,
 				     u64 drops, u64 lastused,
@@ -588,6 +596,8 @@  struct flow_cls_offload {
 	unsigned long cookie;
 	struct flow_rule *rule;
 	struct flow_stats stats;
+	struct flow_act_stat act_stats[FLOW_OFFLOAD_MAX_ACT_STATS];
+	bool use_act_stats;
 	u32 classid;
 };
 
diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h
index d5b8fa01da87..642e6f07cbf0 100644
--- a/include/net/pkt_cls.h
+++ b/include/net/pkt_cls.h
@@ -282,13 +282,27 @@  static inline void tcf_exts_put_net(struct tcf_exts *exts)
 
 static inline void
 tcf_exts_hw_stats_update(const struct tcf_exts *exts,
-			 struct flow_stats *stats)
+			 struct flow_stats *flow_stats,
+			 struct flow_act_stat *act_stats,
+			 bool use_act_stats)
 {
 #ifdef CONFIG_NET_CLS_ACT
+	int nr_actions = exts->nr_actions;
 	int i;
 
-	for (i = 0; i < exts->nr_actions; i++) {
+	if (use_act_stats)
+		nr_actions = FLOW_OFFLOAD_MAX_ACT_STATS;
+
+	for (i = 0; i < nr_actions; i++) {
 		struct tc_action *a = exts->actions[i];
+		struct flow_stats *stats = flow_stats;
+
+		if (use_act_stats) {
+			a = (struct tc_action *)act_stats[i].act_cookie;
+			if (!a)
+				break;
+			stats = &act_stats[i].stats;
+		}
 
 		/* if stats from hw, just skip */
 		if (tcf_action_update_hw_stats(a)) {
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index 50566db45949..c5a6a0d7f7a1 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -3553,6 +3553,7 @@  int tc_setup_action(struct flow_action *flow_action,
 		for (k = 0; k < index ; k++) {
 			entry[k].hw_stats = tc_act_hw_stats(act->hw_stats);
 			entry[k].hw_index = act->tcfa_index;
+			entry[k].act_cookie = (unsigned long)act;
 		}
 
 		j += index;
diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c
index 82b3e8ff656c..ff004f13d0c9 100644
--- a/net/sched/cls_flower.c
+++ b/net/sched/cls_flower.c
@@ -500,7 +500,8 @@  static void fl_hw_update_stats(struct tcf_proto *tp, struct cls_fl_filter *f,
 	tc_setup_cb_call(block, TC_SETUP_CLSFLOWER, &cls_flower, false,
 			 rtnl_held);
 
-	tcf_exts_hw_stats_update(&f->exts, &cls_flower.stats);
+	tcf_exts_hw_stats_update(&f->exts, &cls_flower.stats,
+				 cls_flower.act_stats, cls_flower.use_act_stats);
 }
 
 static void __fl_put(struct cls_fl_filter *f)
diff --git a/net/sched/cls_matchall.c b/net/sched/cls_matchall.c
index 225e87740ec5..3d441063113d 100644
--- a/net/sched/cls_matchall.c
+++ b/net/sched/cls_matchall.c
@@ -329,7 +329,7 @@  static void mall_stats_hw_filter(struct tcf_proto *tp,
 
 	tc_setup_cb_call(block, TC_SETUP_CLSMATCHALL, &cls_mall, false, true);
 
-	tcf_exts_hw_stats_update(&head->exts, &cls_mall.stats);
+	tcf_exts_hw_stats_update(&head->exts, &cls_mall.stats, NULL, false);
 }
 
 static int mall_dump(struct net *net, struct tcf_proto *tp, void *fh,