diff mbox series

[net-next,v4,2/2] net/sched: cls_flower: add support for matching tunnel control flags

Message ID bc5449dc17cfebe90849c9daba8a078065f5ddf8.1717088241.git.dcaratti@redhat.com (mailing list archive)
State Accepted
Commit 1d17568e74dedbcb54d36af0662a15128297d681
Delegated to: Netdev Maintainers
Headers show
Series net: allow dissecting/matching tunnel control flags | expand

Checks

Context Check Description
netdev/series_format success Posting correctly formatted
netdev/tree_selection success Clearly marked for net-next
netdev/ynl success Generated files up to date; no warnings/errors; no diff in generated;
netdev/fixes_present success Fixes tag not required for -next series
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 4817 this patch: 4817
netdev/build_tools success Errors and warnings before: 0 this patch: 0
netdev/cc_maintainers success CCed 7 of 7 maintainers
netdev/build_clang success Errors and warnings before: 1000 this patch: 1000
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/deprecated_api success None detected
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 5086 this patch: 5086
netdev/checkpatch warning WARNING: line length of 81 exceeds 80 columns
netdev/build_clang_rust success No Rust files in patch. Skipping build
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0
netdev/contest success net-next-2024-06-03--15-00 (tests: 1041)

Commit Message

Davide Caratti May 30, 2024, 5:08 p.m. UTC
extend cls_flower to match TUNNEL_FLAGS_PRESENT bits in tunnel metadata.

Suggested-by: Ilya Maximets <i.maximets@ovn.org>
Acked-by: Jamal Hadi Salim <jhs@mojatatu.com>
Signed-off-by: Davide Caratti <dcaratti@redhat.com>
---
 include/uapi/linux/pkt_cls.h |  3 ++
 net/sched/cls_flower.c       | 56 +++++++++++++++++++++++++++++++++++-
 2 files changed, 58 insertions(+), 1 deletion(-)

Comments

Simon Horman June 1, 2024, 1:33 p.m. UTC | #1
On Thu, May 30, 2024 at 07:08:35PM +0200, Davide Caratti wrote:
> extend cls_flower to match TUNNEL_FLAGS_PRESENT bits in tunnel metadata.
> 
> Suggested-by: Ilya Maximets <i.maximets@ovn.org>
> Acked-by: Jamal Hadi Salim <jhs@mojatatu.com>
> Signed-off-by: Davide Caratti <dcaratti@redhat.com>

Reviewed-by: Simon Horman <horms@kernel.org>
Asbjørn Sloth Tønnesen June 5, 2024, 7:37 a.m. UTC | #2
Hi Davide and Ilva,

On 5/30/24 5:08 PM, Davide Caratti wrote:
> extend cls_flower to match TUNNEL_FLAGS_PRESENT bits in tunnel metadata.
> 
> Suggested-by: Ilya Maximets <i.maximets@ovn.org>
> Acked-by: Jamal Hadi Salim <jhs@mojatatu.com>
> Signed-off-by: Davide Caratti <dcaratti@redhat.com>
> ---
>   include/uapi/linux/pkt_cls.h |  3 ++
>   net/sched/cls_flower.c       | 56 +++++++++++++++++++++++++++++++++++-
>   2 files changed, 58 insertions(+), 1 deletion(-)
> 
> diff --git a/include/uapi/linux/pkt_cls.h b/include/uapi/linux/pkt_cls.h
> index 229fc925ec3a..b6d38f5fd7c0 100644
> --- a/include/uapi/linux/pkt_cls.h
> +++ b/include/uapi/linux/pkt_cls.h
> @@ -554,6 +554,9 @@ enum {
>   	TCA_FLOWER_KEY_SPI,		/* be32 */
>   	TCA_FLOWER_KEY_SPI_MASK,	/* be32 */
>   
> +	TCA_FLOWER_KEY_ENC_FLAGS,	/* u32 */
> +	TCA_FLOWER_KEY_ENC_FLAGS_MASK,	/* u32 */
> +
>   	__TCA_FLOWER_MAX,
>   };
>   
> diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c
> index fd9a6f20b60b..eef570c577ac 100644
> --- a/net/sched/cls_flower.c
> +++ b/net/sched/cls_flower.c
> @@ -41,6 +41,12 @@
>   #define TCA_FLOWER_KEY_CT_FLAGS_MASK \
>   		(TCA_FLOWER_KEY_CT_FLAGS_MAX - 1)
>   
> +#define TUNNEL_FLAGS_PRESENT (\
> +	_BITUL(IP_TUNNEL_CSUM_BIT) |		\
> +	_BITUL(IP_TUNNEL_DONT_FRAGMENT_BIT) |	\
> +	_BITUL(IP_TUNNEL_OAM_BIT) |		\
> +	_BITUL(IP_TUNNEL_CRIT_OPT_BIT))
> +
>   struct fl_flow_key {
>   	struct flow_dissector_key_meta meta;
>   	struct flow_dissector_key_control control;
> @@ -75,6 +81,7 @@ struct fl_flow_key {
>   	struct flow_dissector_key_l2tpv3 l2tpv3;
>   	struct flow_dissector_key_ipsec ipsec;
>   	struct flow_dissector_key_cfm cfm;
> +	struct flow_dissector_key_enc_flags enc_flags;
>   } __aligned(BITS_PER_LONG / 8); /* Ensure that we can do comparisons as longs. */
>   
>   struct fl_flow_mask_range {
> @@ -732,6 +739,10 @@ static const struct nla_policy fl_policy[TCA_FLOWER_MAX + 1] = {
>   	[TCA_FLOWER_KEY_SPI_MASK]	= { .type = NLA_U32 },
>   	[TCA_FLOWER_L2_MISS]		= NLA_POLICY_MAX(NLA_U8, 1),
>   	[TCA_FLOWER_KEY_CFM]		= { .type = NLA_NESTED },
> +	[TCA_FLOWER_KEY_ENC_FLAGS]	= NLA_POLICY_MASK(NLA_U32,
> +							  TUNNEL_FLAGS_PRESENT),
> +`	[TCA_FLOWER_KEY_ENC_FLAGS_MASK]	= NLA_POLICY_MASK(NLA_U32,
> +							  TUNNEL_FLAGS_PRESENT),
>   };
>   
>   static const struct nla_policy
> @@ -1825,6 +1836,21 @@ static int fl_set_key_cfm(struct nlattr **tb,
>   	return 0;
>   }
>   
> +static int fl_set_key_enc_flags(struct nlattr **tb, u32 *flags_key,
> +				u32 *flags_mask, struct netlink_ext_ack *extack)
> +{
> +	/* mask is mandatory for flags */
> +	if (NL_REQ_ATTR_CHECK(extack, NULL, tb, TCA_FLOWER_KEY_ENC_FLAGS_MASK)) {
> +		NL_SET_ERR_MSG(extack, "missing enc_flags mask");
> +		return -EINVAL;
> +	}
> +
> +	*flags_key = nla_get_u32(tb[TCA_FLOWER_KEY_ENC_FLAGS]);
> +	*flags_mask = nla_get_u32(tb[TCA_FLOWER_KEY_ENC_FLAGS_MASK]);
> +
> +	return 0;
> +}
> +
>   static int fl_set_key(struct net *net, struct nlattr **tb,
>   		      struct fl_flow_key *key, struct fl_flow_key *mask,
>   		      struct netlink_ext_ack *extack)
> @@ -2059,9 +2085,16 @@ static int fl_set_key(struct net *net, struct nlattr **tb,
>   	if (ret)
>   		return ret;
>   
> -	if (tb[TCA_FLOWER_KEY_FLAGS])
> +	if (tb[TCA_FLOWER_KEY_FLAGS]) {
>   		ret = fl_set_key_flags(tb, &key->control.flags,
>   				       &mask->control.flags, extack);
> +		if (ret)
> +			return ret;
> +	}
> +
> +	if (tb[TCA_FLOWER_KEY_ENC_FLAGS])
> +		ret = fl_set_key_enc_flags(tb, &key->enc_flags.flags,
> +					   &mask->enc_flags.flags, extack);
>   
>   	return ret;

Sorry, that I am late to the party, I have been away camping at
Electromagnetic Field in the UK.

Why not use the already existing key->enc_control.flags with dissector
key FLOW_DISSECTOR_KEY_ENC_CONTROL for storing the flags?

Currently key->enc_control.flags are unused.

I haven't fixed the drivers to validate that field yet, so currently
only sfc does so.

Look at include/uapi/linux/pkt_cls.h for netlink flags:

enum {
         TCA_FLOWER_KEY_FLAGS_IS_FRAGMENT = (1 << 0),
         TCA_FLOWER_KEY_FLAGS_FRAG_IS_FIRST = (1 << 1),
};

and at include/net/flow_dissector.h for the dissector flags:

#define FLOW_DIS_IS_FRAGMENT    BIT(0)
#define FLOW_DIS_FIRST_FRAG     BIT(1)
#define FLOW_DIS_ENCAPSULATION  BIT(2)

I would like to keep FLOW_DIS_ENCAPSULATION as the last flag, in order to
keep parity with the netlink flags, since the dissector flags field is
exposed to user space when some flags are not supported.

I realize that since this series is now merged, then fixing this up will
have to go in another series, are you up for that?
Davide Caratti June 6, 2024, 8:09 a.m. UTC | #3
hello Asbjørn, thanks for looking at this!

On Wed, Jun 05, 2024 at 07:37:13AM +0000, Asbjørn Sloth Tønnesen wrote:
> Hi Davide and Ilva,

[...] 
 
> Sorry, that I am late to the party, I have been away camping at
> Electromagnetic Field in the UK.

guess you had fun :) 

> Why not use the already existing key->enc_control.flags with dissector
> key FLOW_DISSECTOR_KEY_ENC_CONTROL for storing the flags?
> 
> Currently key->enc_control.flags are unused.

I looked at the struct, it currently stores information about
IPv4 / IPv6 fragmentation of the inner/outer packet plus
FLOW_DIS_ENCAPSULATION - that IIRC is true in case the packet has
an inner IP header (key->enc_control.addr_type selects the address family).
these 3 bits are usually set when FLOW_DISSECTOR_KEY_CONTROL is
requested.

> I haven't fixed the drivers to validate that field yet, so currently
> only sfc does so.

I see:

[1] https://elixir.bootlin.com/linux/v6.10-rc2/source/drivers/net/ethernet/sfc/tc.c#L276

(for FLOW_DISSECTOR_KEY_CONTROL)

[2] https://elixir.bootlin.com/linux/v6.10-rc2/source/drivers/net/ethernet/sfc/tc.c#L390

(for FLOW_DISSECTOR_KEY_ENC_CONTROL)

> Look at include/uapi/linux/pkt_cls.h for netlink flags:
> 
> enum {
>         TCA_FLOWER_KEY_FLAGS_IS_FRAGMENT = (1 << 0),
>         TCA_FLOWER_KEY_FLAGS_FRAG_IS_FIRST = (1 << 1),
> };
> 
> and at include/net/flow_dissector.h for the dissector flags:
> 
> #define FLOW_DIS_IS_FRAGMENT    BIT(0)
> #define FLOW_DIS_FIRST_FRAG     BIT(1)
> #define FLOW_DIS_ENCAPSULATION  BIT(2)
> 
> I would like to keep FLOW_DIS_ENCAPSULATION as the last flag, in order to
> keep parity with the netlink flags, since the dissector flags field is
> exposed to user space when some flags are not supported.

to fully understand this, I need some more info on how you think
IP_TUNNEL_<blah> bit should be written on the dissected data, since
FLOW_DIS_IS_FRAGMENT has the same value as BIT(IP_TUNNEL_CSUM_BIT).
So, in a way or another parity with netlink can't be guaranteed.

The main reason why I added a new dissector bit is: avoiding breaking
functionality of drivers, because I don't really know what the hardware
does. If we want to use FLOW_DISSECTOR_KEY_ENC_CONTROL, then all drivers
supporting the offload of this bit need to return -EOPNOTSUPP if
key->enc_control.flags contains IP_TUNNEL_<blah> bits (like done in [2] for
FLOW_DIS_*FRAG*).
Moreover, if a driver supports offload of one of these bits (e.g. TUNNEL_OAM),
we need to understand if it's possible for hardware to match when 'addr_type'
is not specified, e.g. a geneve packet carrying a non-IP packet.

> I realize that since this series is now merged, then fixing this up will
> have to go in another series, are you up for that?

I'm not against: if there's something to improve, this is the right
moment _ later it will be more difficult; but we should clarify:

- how / if we want to handle overlap of 'flags'. I guess, pass the
  current tunnel flags to the arguments of
  skb_flow_dissect_set_enc_addr_type(), and write "something" in
  ctrl->flags.
- what to do with with flower netlink API. If these bits are in the
  same struct, we don't need TCA_FLOWER_KEY_ENC_FLAGS anymore? should
  we extend TCA_FLOWER_KEY_FLAGS instead?

thanks,
diff mbox series

Patch

diff --git a/include/uapi/linux/pkt_cls.h b/include/uapi/linux/pkt_cls.h
index 229fc925ec3a..b6d38f5fd7c0 100644
--- a/include/uapi/linux/pkt_cls.h
+++ b/include/uapi/linux/pkt_cls.h
@@ -554,6 +554,9 @@  enum {
 	TCA_FLOWER_KEY_SPI,		/* be32 */
 	TCA_FLOWER_KEY_SPI_MASK,	/* be32 */
 
+	TCA_FLOWER_KEY_ENC_FLAGS,	/* u32 */
+	TCA_FLOWER_KEY_ENC_FLAGS_MASK,	/* u32 */
+
 	__TCA_FLOWER_MAX,
 };
 
diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c
index fd9a6f20b60b..eef570c577ac 100644
--- a/net/sched/cls_flower.c
+++ b/net/sched/cls_flower.c
@@ -41,6 +41,12 @@ 
 #define TCA_FLOWER_KEY_CT_FLAGS_MASK \
 		(TCA_FLOWER_KEY_CT_FLAGS_MAX - 1)
 
+#define TUNNEL_FLAGS_PRESENT (\
+	_BITUL(IP_TUNNEL_CSUM_BIT) |		\
+	_BITUL(IP_TUNNEL_DONT_FRAGMENT_BIT) |	\
+	_BITUL(IP_TUNNEL_OAM_BIT) |		\
+	_BITUL(IP_TUNNEL_CRIT_OPT_BIT))
+
 struct fl_flow_key {
 	struct flow_dissector_key_meta meta;
 	struct flow_dissector_key_control control;
@@ -75,6 +81,7 @@  struct fl_flow_key {
 	struct flow_dissector_key_l2tpv3 l2tpv3;
 	struct flow_dissector_key_ipsec ipsec;
 	struct flow_dissector_key_cfm cfm;
+	struct flow_dissector_key_enc_flags enc_flags;
 } __aligned(BITS_PER_LONG / 8); /* Ensure that we can do comparisons as longs. */
 
 struct fl_flow_mask_range {
@@ -732,6 +739,10 @@  static const struct nla_policy fl_policy[TCA_FLOWER_MAX + 1] = {
 	[TCA_FLOWER_KEY_SPI_MASK]	= { .type = NLA_U32 },
 	[TCA_FLOWER_L2_MISS]		= NLA_POLICY_MAX(NLA_U8, 1),
 	[TCA_FLOWER_KEY_CFM]		= { .type = NLA_NESTED },
+	[TCA_FLOWER_KEY_ENC_FLAGS]	= NLA_POLICY_MASK(NLA_U32,
+							  TUNNEL_FLAGS_PRESENT),
+	[TCA_FLOWER_KEY_ENC_FLAGS_MASK]	= NLA_POLICY_MASK(NLA_U32,
+							  TUNNEL_FLAGS_PRESENT),
 };
 
 static const struct nla_policy
@@ -1825,6 +1836,21 @@  static int fl_set_key_cfm(struct nlattr **tb,
 	return 0;
 }
 
+static int fl_set_key_enc_flags(struct nlattr **tb, u32 *flags_key,
+				u32 *flags_mask, struct netlink_ext_ack *extack)
+{
+	/* mask is mandatory for flags */
+	if (NL_REQ_ATTR_CHECK(extack, NULL, tb, TCA_FLOWER_KEY_ENC_FLAGS_MASK)) {
+		NL_SET_ERR_MSG(extack, "missing enc_flags mask");
+		return -EINVAL;
+	}
+
+	*flags_key = nla_get_u32(tb[TCA_FLOWER_KEY_ENC_FLAGS]);
+	*flags_mask = nla_get_u32(tb[TCA_FLOWER_KEY_ENC_FLAGS_MASK]);
+
+	return 0;
+}
+
 static int fl_set_key(struct net *net, struct nlattr **tb,
 		      struct fl_flow_key *key, struct fl_flow_key *mask,
 		      struct netlink_ext_ack *extack)
@@ -2059,9 +2085,16 @@  static int fl_set_key(struct net *net, struct nlattr **tb,
 	if (ret)
 		return ret;
 
-	if (tb[TCA_FLOWER_KEY_FLAGS])
+	if (tb[TCA_FLOWER_KEY_FLAGS]) {
 		ret = fl_set_key_flags(tb, &key->control.flags,
 				       &mask->control.flags, extack);
+		if (ret)
+			return ret;
+	}
+
+	if (tb[TCA_FLOWER_KEY_ENC_FLAGS])
+		ret = fl_set_key_enc_flags(tb, &key->enc_flags.flags,
+					   &mask->enc_flags.flags, extack);
 
 	return ret;
 }
@@ -2175,6 +2208,8 @@  static void fl_init_dissector(struct flow_dissector *dissector,
 			     FLOW_DISSECTOR_KEY_IPSEC, ipsec);
 	FL_KEY_SET_IF_MASKED(mask, keys, cnt,
 			     FLOW_DISSECTOR_KEY_CFM, cfm);
+	FL_KEY_SET_IF_MASKED(mask, keys, cnt,
+			     FLOW_DISSECTOR_KEY_ENC_FLAGS, enc_flags);
 
 	skb_flow_dissector_init(dissector, keys, cnt);
 }
@@ -3291,6 +3326,22 @@  static int fl_dump_key_cfm(struct sk_buff *skb,
 	return err;
 }
 
+static int fl_dump_key_enc_flags(struct sk_buff *skb,
+				 struct flow_dissector_key_enc_flags *key,
+				 struct flow_dissector_key_enc_flags *mask)
+{
+	if (!memchr_inv(mask, 0, sizeof(*mask)))
+		return 0;
+
+	if (nla_put_u32(skb, TCA_FLOWER_KEY_ENC_FLAGS, key->flags))
+		return -EMSGSIZE;
+
+	if (nla_put_u32(skb, TCA_FLOWER_KEY_ENC_FLAGS_MASK, mask->flags))
+		return -EMSGSIZE;
+
+	return 0;
+}
+
 static int fl_dump_key_options(struct sk_buff *skb, int enc_opt_type,
 			       struct flow_dissector_key_enc_opts *enc_opts)
 {
@@ -3592,6 +3643,9 @@  static int fl_dump_key(struct sk_buff *skb, struct net *net,
 	if (fl_dump_key_cfm(skb, &key->cfm, &mask->cfm))
 		goto nla_put_failure;
 
+	if (fl_dump_key_enc_flags(skb, &key->enc_flags, &mask->enc_flags))
+		goto nla_put_failure;
+
 	return 0;
 
 nla_put_failure: