Message ID | bc5449dc17cfebe90849c9daba8a078065f5ddf8.1717088241.git.dcaratti@redhat.com (mailing list archive) |
---|---|
State | Accepted |
Commit | 1d17568e74dedbcb54d36af0662a15128297d681 |
Delegated to: | Netdev Maintainers |
Headers | show |
Series | net: allow dissecting/matching tunnel control flags | expand |
On Thu, May 30, 2024 at 07:08:35PM +0200, Davide Caratti wrote: > extend cls_flower to match TUNNEL_FLAGS_PRESENT bits in tunnel metadata. > > Suggested-by: Ilya Maximets <i.maximets@ovn.org> > Acked-by: Jamal Hadi Salim <jhs@mojatatu.com> > Signed-off-by: Davide Caratti <dcaratti@redhat.com> Reviewed-by: Simon Horman <horms@kernel.org>
Hi Davide and Ilva, On 5/30/24 5:08 PM, Davide Caratti wrote: > extend cls_flower to match TUNNEL_FLAGS_PRESENT bits in tunnel metadata. > > Suggested-by: Ilya Maximets <i.maximets@ovn.org> > Acked-by: Jamal Hadi Salim <jhs@mojatatu.com> > Signed-off-by: Davide Caratti <dcaratti@redhat.com> > --- > include/uapi/linux/pkt_cls.h | 3 ++ > net/sched/cls_flower.c | 56 +++++++++++++++++++++++++++++++++++- > 2 files changed, 58 insertions(+), 1 deletion(-) > > diff --git a/include/uapi/linux/pkt_cls.h b/include/uapi/linux/pkt_cls.h > index 229fc925ec3a..b6d38f5fd7c0 100644 > --- a/include/uapi/linux/pkt_cls.h > +++ b/include/uapi/linux/pkt_cls.h > @@ -554,6 +554,9 @@ enum { > TCA_FLOWER_KEY_SPI, /* be32 */ > TCA_FLOWER_KEY_SPI_MASK, /* be32 */ > > + TCA_FLOWER_KEY_ENC_FLAGS, /* u32 */ > + TCA_FLOWER_KEY_ENC_FLAGS_MASK, /* u32 */ > + > __TCA_FLOWER_MAX, > }; > > diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c > index fd9a6f20b60b..eef570c577ac 100644 > --- a/net/sched/cls_flower.c > +++ b/net/sched/cls_flower.c > @@ -41,6 +41,12 @@ > #define TCA_FLOWER_KEY_CT_FLAGS_MASK \ > (TCA_FLOWER_KEY_CT_FLAGS_MAX - 1) > > +#define TUNNEL_FLAGS_PRESENT (\ > + _BITUL(IP_TUNNEL_CSUM_BIT) | \ > + _BITUL(IP_TUNNEL_DONT_FRAGMENT_BIT) | \ > + _BITUL(IP_TUNNEL_OAM_BIT) | \ > + _BITUL(IP_TUNNEL_CRIT_OPT_BIT)) > + > struct fl_flow_key { > struct flow_dissector_key_meta meta; > struct flow_dissector_key_control control; > @@ -75,6 +81,7 @@ struct fl_flow_key { > struct flow_dissector_key_l2tpv3 l2tpv3; > struct flow_dissector_key_ipsec ipsec; > struct flow_dissector_key_cfm cfm; > + struct flow_dissector_key_enc_flags enc_flags; > } __aligned(BITS_PER_LONG / 8); /* Ensure that we can do comparisons as longs. */ > > struct fl_flow_mask_range { > @@ -732,6 +739,10 @@ static const struct nla_policy fl_policy[TCA_FLOWER_MAX + 1] = { > [TCA_FLOWER_KEY_SPI_MASK] = { .type = NLA_U32 }, > [TCA_FLOWER_L2_MISS] = NLA_POLICY_MAX(NLA_U8, 1), > [TCA_FLOWER_KEY_CFM] = { .type = NLA_NESTED }, > + [TCA_FLOWER_KEY_ENC_FLAGS] = NLA_POLICY_MASK(NLA_U32, > + TUNNEL_FLAGS_PRESENT), > +` [TCA_FLOWER_KEY_ENC_FLAGS_MASK] = NLA_POLICY_MASK(NLA_U32, > + TUNNEL_FLAGS_PRESENT), > }; > > static const struct nla_policy > @@ -1825,6 +1836,21 @@ static int fl_set_key_cfm(struct nlattr **tb, > return 0; > } > > +static int fl_set_key_enc_flags(struct nlattr **tb, u32 *flags_key, > + u32 *flags_mask, struct netlink_ext_ack *extack) > +{ > + /* mask is mandatory for flags */ > + if (NL_REQ_ATTR_CHECK(extack, NULL, tb, TCA_FLOWER_KEY_ENC_FLAGS_MASK)) { > + NL_SET_ERR_MSG(extack, "missing enc_flags mask"); > + return -EINVAL; > + } > + > + *flags_key = nla_get_u32(tb[TCA_FLOWER_KEY_ENC_FLAGS]); > + *flags_mask = nla_get_u32(tb[TCA_FLOWER_KEY_ENC_FLAGS_MASK]); > + > + return 0; > +} > + > static int fl_set_key(struct net *net, struct nlattr **tb, > struct fl_flow_key *key, struct fl_flow_key *mask, > struct netlink_ext_ack *extack) > @@ -2059,9 +2085,16 @@ static int fl_set_key(struct net *net, struct nlattr **tb, > if (ret) > return ret; > > - if (tb[TCA_FLOWER_KEY_FLAGS]) > + if (tb[TCA_FLOWER_KEY_FLAGS]) { > ret = fl_set_key_flags(tb, &key->control.flags, > &mask->control.flags, extack); > + if (ret) > + return ret; > + } > + > + if (tb[TCA_FLOWER_KEY_ENC_FLAGS]) > + ret = fl_set_key_enc_flags(tb, &key->enc_flags.flags, > + &mask->enc_flags.flags, extack); > > return ret; Sorry, that I am late to the party, I have been away camping at Electromagnetic Field in the UK. Why not use the already existing key->enc_control.flags with dissector key FLOW_DISSECTOR_KEY_ENC_CONTROL for storing the flags? Currently key->enc_control.flags are unused. I haven't fixed the drivers to validate that field yet, so currently only sfc does so. Look at include/uapi/linux/pkt_cls.h for netlink flags: enum { TCA_FLOWER_KEY_FLAGS_IS_FRAGMENT = (1 << 0), TCA_FLOWER_KEY_FLAGS_FRAG_IS_FIRST = (1 << 1), }; and at include/net/flow_dissector.h for the dissector flags: #define FLOW_DIS_IS_FRAGMENT BIT(0) #define FLOW_DIS_FIRST_FRAG BIT(1) #define FLOW_DIS_ENCAPSULATION BIT(2) I would like to keep FLOW_DIS_ENCAPSULATION as the last flag, in order to keep parity with the netlink flags, since the dissector flags field is exposed to user space when some flags are not supported. I realize that since this series is now merged, then fixing this up will have to go in another series, are you up for that?
hello Asbjørn, thanks for looking at this! On Wed, Jun 05, 2024 at 07:37:13AM +0000, Asbjørn Sloth Tønnesen wrote: > Hi Davide and Ilva, [...] > Sorry, that I am late to the party, I have been away camping at > Electromagnetic Field in the UK. guess you had fun :) > Why not use the already existing key->enc_control.flags with dissector > key FLOW_DISSECTOR_KEY_ENC_CONTROL for storing the flags? > > Currently key->enc_control.flags are unused. I looked at the struct, it currently stores information about IPv4 / IPv6 fragmentation of the inner/outer packet plus FLOW_DIS_ENCAPSULATION - that IIRC is true in case the packet has an inner IP header (key->enc_control.addr_type selects the address family). these 3 bits are usually set when FLOW_DISSECTOR_KEY_CONTROL is requested. > I haven't fixed the drivers to validate that field yet, so currently > only sfc does so. I see: [1] https://elixir.bootlin.com/linux/v6.10-rc2/source/drivers/net/ethernet/sfc/tc.c#L276 (for FLOW_DISSECTOR_KEY_CONTROL) [2] https://elixir.bootlin.com/linux/v6.10-rc2/source/drivers/net/ethernet/sfc/tc.c#L390 (for FLOW_DISSECTOR_KEY_ENC_CONTROL) > Look at include/uapi/linux/pkt_cls.h for netlink flags: > > enum { > TCA_FLOWER_KEY_FLAGS_IS_FRAGMENT = (1 << 0), > TCA_FLOWER_KEY_FLAGS_FRAG_IS_FIRST = (1 << 1), > }; > > and at include/net/flow_dissector.h for the dissector flags: > > #define FLOW_DIS_IS_FRAGMENT BIT(0) > #define FLOW_DIS_FIRST_FRAG BIT(1) > #define FLOW_DIS_ENCAPSULATION BIT(2) > > I would like to keep FLOW_DIS_ENCAPSULATION as the last flag, in order to > keep parity with the netlink flags, since the dissector flags field is > exposed to user space when some flags are not supported. to fully understand this, I need some more info on how you think IP_TUNNEL_<blah> bit should be written on the dissected data, since FLOW_DIS_IS_FRAGMENT has the same value as BIT(IP_TUNNEL_CSUM_BIT). So, in a way or another parity with netlink can't be guaranteed. The main reason why I added a new dissector bit is: avoiding breaking functionality of drivers, because I don't really know what the hardware does. If we want to use FLOW_DISSECTOR_KEY_ENC_CONTROL, then all drivers supporting the offload of this bit need to return -EOPNOTSUPP if key->enc_control.flags contains IP_TUNNEL_<blah> bits (like done in [2] for FLOW_DIS_*FRAG*). Moreover, if a driver supports offload of one of these bits (e.g. TUNNEL_OAM), we need to understand if it's possible for hardware to match when 'addr_type' is not specified, e.g. a geneve packet carrying a non-IP packet. > I realize that since this series is now merged, then fixing this up will > have to go in another series, are you up for that? I'm not against: if there's something to improve, this is the right moment _ later it will be more difficult; but we should clarify: - how / if we want to handle overlap of 'flags'. I guess, pass the current tunnel flags to the arguments of skb_flow_dissect_set_enc_addr_type(), and write "something" in ctrl->flags. - what to do with with flower netlink API. If these bits are in the same struct, we don't need TCA_FLOWER_KEY_ENC_FLAGS anymore? should we extend TCA_FLOWER_KEY_FLAGS instead? thanks,
diff --git a/include/uapi/linux/pkt_cls.h b/include/uapi/linux/pkt_cls.h index 229fc925ec3a..b6d38f5fd7c0 100644 --- a/include/uapi/linux/pkt_cls.h +++ b/include/uapi/linux/pkt_cls.h @@ -554,6 +554,9 @@ enum { TCA_FLOWER_KEY_SPI, /* be32 */ TCA_FLOWER_KEY_SPI_MASK, /* be32 */ + TCA_FLOWER_KEY_ENC_FLAGS, /* u32 */ + TCA_FLOWER_KEY_ENC_FLAGS_MASK, /* u32 */ + __TCA_FLOWER_MAX, }; diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c index fd9a6f20b60b..eef570c577ac 100644 --- a/net/sched/cls_flower.c +++ b/net/sched/cls_flower.c @@ -41,6 +41,12 @@ #define TCA_FLOWER_KEY_CT_FLAGS_MASK \ (TCA_FLOWER_KEY_CT_FLAGS_MAX - 1) +#define TUNNEL_FLAGS_PRESENT (\ + _BITUL(IP_TUNNEL_CSUM_BIT) | \ + _BITUL(IP_TUNNEL_DONT_FRAGMENT_BIT) | \ + _BITUL(IP_TUNNEL_OAM_BIT) | \ + _BITUL(IP_TUNNEL_CRIT_OPT_BIT)) + struct fl_flow_key { struct flow_dissector_key_meta meta; struct flow_dissector_key_control control; @@ -75,6 +81,7 @@ struct fl_flow_key { struct flow_dissector_key_l2tpv3 l2tpv3; struct flow_dissector_key_ipsec ipsec; struct flow_dissector_key_cfm cfm; + struct flow_dissector_key_enc_flags enc_flags; } __aligned(BITS_PER_LONG / 8); /* Ensure that we can do comparisons as longs. */ struct fl_flow_mask_range { @@ -732,6 +739,10 @@ static const struct nla_policy fl_policy[TCA_FLOWER_MAX + 1] = { [TCA_FLOWER_KEY_SPI_MASK] = { .type = NLA_U32 }, [TCA_FLOWER_L2_MISS] = NLA_POLICY_MAX(NLA_U8, 1), [TCA_FLOWER_KEY_CFM] = { .type = NLA_NESTED }, + [TCA_FLOWER_KEY_ENC_FLAGS] = NLA_POLICY_MASK(NLA_U32, + TUNNEL_FLAGS_PRESENT), + [TCA_FLOWER_KEY_ENC_FLAGS_MASK] = NLA_POLICY_MASK(NLA_U32, + TUNNEL_FLAGS_PRESENT), }; static const struct nla_policy @@ -1825,6 +1836,21 @@ static int fl_set_key_cfm(struct nlattr **tb, return 0; } +static int fl_set_key_enc_flags(struct nlattr **tb, u32 *flags_key, + u32 *flags_mask, struct netlink_ext_ack *extack) +{ + /* mask is mandatory for flags */ + if (NL_REQ_ATTR_CHECK(extack, NULL, tb, TCA_FLOWER_KEY_ENC_FLAGS_MASK)) { + NL_SET_ERR_MSG(extack, "missing enc_flags mask"); + return -EINVAL; + } + + *flags_key = nla_get_u32(tb[TCA_FLOWER_KEY_ENC_FLAGS]); + *flags_mask = nla_get_u32(tb[TCA_FLOWER_KEY_ENC_FLAGS_MASK]); + + return 0; +} + static int fl_set_key(struct net *net, struct nlattr **tb, struct fl_flow_key *key, struct fl_flow_key *mask, struct netlink_ext_ack *extack) @@ -2059,9 +2085,16 @@ static int fl_set_key(struct net *net, struct nlattr **tb, if (ret) return ret; - if (tb[TCA_FLOWER_KEY_FLAGS]) + if (tb[TCA_FLOWER_KEY_FLAGS]) { ret = fl_set_key_flags(tb, &key->control.flags, &mask->control.flags, extack); + if (ret) + return ret; + } + + if (tb[TCA_FLOWER_KEY_ENC_FLAGS]) + ret = fl_set_key_enc_flags(tb, &key->enc_flags.flags, + &mask->enc_flags.flags, extack); return ret; } @@ -2175,6 +2208,8 @@ static void fl_init_dissector(struct flow_dissector *dissector, FLOW_DISSECTOR_KEY_IPSEC, ipsec); FL_KEY_SET_IF_MASKED(mask, keys, cnt, FLOW_DISSECTOR_KEY_CFM, cfm); + FL_KEY_SET_IF_MASKED(mask, keys, cnt, + FLOW_DISSECTOR_KEY_ENC_FLAGS, enc_flags); skb_flow_dissector_init(dissector, keys, cnt); } @@ -3291,6 +3326,22 @@ static int fl_dump_key_cfm(struct sk_buff *skb, return err; } +static int fl_dump_key_enc_flags(struct sk_buff *skb, + struct flow_dissector_key_enc_flags *key, + struct flow_dissector_key_enc_flags *mask) +{ + if (!memchr_inv(mask, 0, sizeof(*mask))) + return 0; + + if (nla_put_u32(skb, TCA_FLOWER_KEY_ENC_FLAGS, key->flags)) + return -EMSGSIZE; + + if (nla_put_u32(skb, TCA_FLOWER_KEY_ENC_FLAGS_MASK, mask->flags)) + return -EMSGSIZE; + + return 0; +} + static int fl_dump_key_options(struct sk_buff *skb, int enc_opt_type, struct flow_dissector_key_enc_opts *enc_opts) { @@ -3592,6 +3643,9 @@ static int fl_dump_key(struct sk_buff *skb, struct net *net, if (fl_dump_key_cfm(skb, &key->cfm, &mask->cfm)) goto nla_put_failure; + if (fl_dump_key_enc_flags(skb, &key->enc_flags, &mask->enc_flags)) + goto nla_put_failure; + return 0; nla_put_failure: