Message ID | 20210928114451.24956-1-xiaoliang.yang_1@nxp.com (mailing list archive) |
---|---|
State | RFC |
Delegated to: | Netdev Maintainers |
Headers | show |
Series | [RFC,net-next] net: qos: introduce a frer action to implement 802.1CB | expand |
Context | Check | Description |
---|---|---|
netdev/cover_letter | success | Link |
netdev/fixes_present | success | Link |
netdev/patch_count | success | Link |
netdev/tree_selection | success | Clearly marked for net-next |
netdev/subject_prefix | success | Link |
netdev/cc_maintainers | warning | 3 maintainers not CCed: kuba@kernel.org jiri@resnulli.us xiyou.wangcong@gmail.com |
netdev/source_inline | success | Was 0 now: 0 |
netdev/verify_signedoff | success | Link |
netdev/module_param | success | Was 0 now: 0 |
netdev/build_32bit | fail | Errors and warnings before: 6367 this patch: 6194 |
netdev/kdoc | success | Errors and warnings before: 0 this patch: 0 |
netdev/verify_fixes | success | Link |
netdev/checkpatch | warning | CHECK: Please use a blank line after function/struct/union/enum declarations WARNING: added, moved or deleted file(s), does MAINTAINERS need updating? WARNING: line length of 82 exceeds 80 columns |
netdev/build_allmodconfig_warn | fail | Errors and warnings before: 6459 this patch: 6286 |
netdev/header_inline | success | Link |
Hi, Xiaoliang Yang <xiaoliang.yang_1@nxp.com> writes: > This patch introduce a frer action to implement frame replication and > elimination for reliability, which is defined in IEEE P802.1CB. > An action seems, to me, a bit too limiting/fine grained for a frame replication and elimination feature. At least I want to hear the reasons that the current hsr/prp support cannot be extended to support one more tag format/protocol. And the current name for the spec is IEEE 802.1CB-2017. > There are two modes for frer action: generate and push the tag, recover > and pop the tag. frer tag has three types: RTAG, HSR, and PRP. This > patch only supports RTAG now. > > User can push the tag on egress port of the talker device, recover and > pop the tag on ingress port of the listener device. When it's a relay > system, push the tag on ingress port, or set individual recover on > ingress port. Set the sequence recover on egress port. > > Use action "mirred" to do split function, and use "vlan-modify" to do > active stream identification function on relay system. > > Below is the setting example in user space: > push rtag on relay system: > > tc qdisc add dev swp0 clsact > > tc filter add dev swp0 ingress protocol 802.1Q flower \ > skip_hw dst_mac 00:01:02:03:04:05 vlan_id 1 \ > action frer rtag tag-action tag-push > > split stream: > > tc filter add dev swp0 ingress protocol 802.1Q flower \ > skip_hw dst_mac 00:01:02:03:04:05 vlan_id 1 \ > action mirred egress mirror dev swp1 > > individual recover: > > tc filter add dev swp0 ingress protocol 802.1Q flower > skip_hw dst_mac 00:01:02:03:04:06 vlan_id 1 \ > action frer rtag recover \ > alg vector history-length 32 reset-time 10000 > > recover and pop rtag: > > tc filter add dev swp0 egress protocol 802.1Q flower > skip_hw dst_mac 00:01:02:03:04:06 vlan_id 1 \ > action frer rtag recover \ > alg vector history-length 32 reset-time 10000 \ > tag-action tag-pop > > Signed-off-by: Xiaoliang Yang <xiaoliang.yang_1@nxp.com> > --- > include/net/flow_offload.h | 9 + > include/net/tc_act/tc_frer.h | 52 +++ > include/uapi/linux/if_ether.h | 1 + > include/uapi/linux/pkt_cls.h | 1 + > include/uapi/linux/tc_act/tc_frer.h | 50 ++ > net/sched/Kconfig | 13 + > net/sched/Makefile | 1 + > net/sched/act_frer.c | 695 ++++++++++++++++++++++++++++ > net/sched/cls_api.c | 11 + > 9 files changed, 833 insertions(+) > create mode 100644 include/net/tc_act/tc_frer.h > create mode 100644 include/uapi/linux/tc_act/tc_frer.h > create mode 100644 net/sched/act_frer.c > > diff --git a/include/net/flow_offload.h b/include/net/flow_offload.h > index 3961461d9c8b..cfa9b69cec69 100644 > --- a/include/net/flow_offload.h > +++ b/include/net/flow_offload.h > @@ -148,6 +148,7 @@ enum flow_action_id { > FLOW_ACTION_MPLS_MANGLE, > FLOW_ACTION_GATE, > FLOW_ACTION_PPPOE_PUSH, > + FLOW_ACTION_FRER, > NUM_FLOW_ACTIONS, > }; > > @@ -278,6 +279,14 @@ struct flow_action_entry { > struct { /* FLOW_ACTION_PPPOE_PUSH */ > u16 sid; > } pppoe; > + struct { > + u8 tag_type; > + u8 tag_action; > + u8 recover; > + u8 rcvy_alg; > + u8 rcvy_history_len; > + u8 rcvy_reset_msec; > + } frer; > }; > struct flow_action_cookie *cookie; /* user defined action cookie */ > }; > diff --git a/include/net/tc_act/tc_frer.h b/include/net/tc_act/tc_frer.h > new file mode 100644 > index 000000000000..b2ad2b2a3fe1 > --- /dev/null > +++ b/include/net/tc_act/tc_frer.h > @@ -0,0 +1,52 @@ > +/* SPDX-License-Identifier: GPL-2.0-or-later */ > +/* Copyright 2021 NXP */ > + > +#ifndef __NET_TC_FRER_H > +#define __NET_TC_FRER_H > + > +#include <net/act_api.h> > +#include <linux/tc_act/tc_frer.h> > + > +struct tcf_frer; > + > +struct tcf_frer_proto_ops { > + int (*encode)(struct sk_buff *skb, struct tcf_frer *frer_act); > + int (*decode)(struct sk_buff *skb); > + void (*tag_pop)(struct sk_buff *skb, struct tcf_frer *frer_act); > +}; > + > +struct tcf_frer { > + struct tc_action common; > + u8 tag_type; > + u8 tag_action; > + u8 recover; > + u8 rcvy_alg; > + u8 rcvy_history_len; > + u64 rcvy_reset_msec; > + u32 gen_seq_num; > + u32 rcvy_seq_num; > + u64 seq_space; > + u32 seq_history; > + bool take_any; > + bool rcvy_take_noseq; > + u32 cps_seq_rcvy_lost_pkts; > + u32 cps_seq_rcvy_tagless_pkts; > + u32 cps_seq_rcvy_out_of_order_pkts; > + u32 cps_seq_rcvy_rogue_pkts; > + u32 cps_seq_rcvy_resets; > + struct hrtimer hrtimer; > + const struct tcf_frer_proto_ops *proto_ops; > +}; > + > +#define to_frer(a) ((struct tcf_frer *)a) > + > +static inline bool is_tcf_frer(const struct tc_action *a) > +{ > +#ifdef CONFIG_NET_CLS_ACT > + if (a->ops && a->ops->id == TCA_ID_FRER) > + return true; > +#endif > + return false; > +} > + > +#endif > diff --git a/include/uapi/linux/if_ether.h b/include/uapi/linux/if_ether.h > index 5f589c7a8382..812aa75f7f23 100644 > --- a/include/uapi/linux/if_ether.h > +++ b/include/uapi/linux/if_ether.h > @@ -114,6 +114,7 @@ > #define ETH_P_EDSA 0xDADA /* Ethertype DSA [ NOT AN OFFICIALLY REGISTERED ID ] */ > #define ETH_P_DSA_8021Q 0xDADB /* Fake VLAN Header for DSA [ NOT AN OFFICIALLY REGISTERED ID ] */ > #define ETH_P_IFE 0xED3E /* ForCES inter-FE LFB type */ > +#define ETH_P_RTAG 0xF1C1 /* Redundancy Tag(IEEE 802.1CB) */ > #define ETH_P_AF_IUCV 0xFBFB /* IBM af_iucv [ NOT AN OFFICIALLY REGISTERED ID ] */ > > #define ETH_P_802_3_MIN 0x0600 /* If the value in the ethernet type is less than this value > diff --git a/include/uapi/linux/pkt_cls.h b/include/uapi/linux/pkt_cls.h > index 6836ccb9c45d..a3fc0c478a65 100644 > --- a/include/uapi/linux/pkt_cls.h > +++ b/include/uapi/linux/pkt_cls.h > @@ -136,6 +136,7 @@ enum tca_id { > TCA_ID_MPLS, > TCA_ID_CT, > TCA_ID_GATE, > + TCA_ID_FRER, > /* other actions go here */ > __TCA_ID_MAX = 255 > }; > diff --git a/include/uapi/linux/tc_act/tc_frer.h b/include/uapi/linux/tc_act/tc_frer.h > new file mode 100644 > index 000000000000..cd86274483e7 > --- /dev/null > +++ b/include/uapi/linux/tc_act/tc_frer.h > @@ -0,0 +1,50 @@ > +/* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */ > +/* Copyright 2021 NXP */ > + > +#ifndef __LINUX_TC_FRER_H > +#define __LINUX_TC_FRER_H > + > +#include <linux/pkt_cls.h> > + > +struct tc_frer { > + tc_gen; > +}; > + > +enum { > + TCA_FRER_UNSPEC, > + TCA_FRER_TM, > + TCA_FRER_PARMS, > + TCA_FRER_PAD, > + TCA_FRER_TAG_TYPE, > + TCA_FRER_TAG_ACTION, > + TCA_FRER_RECOVER, > + TCA_FRER_RECOVER_ALG, > + TCA_FRER_RECOVER_HISTORY_LEN, > + TCA_FRER_RECOVER_RESET_TM, > + TCA_FRER_RECOVER_TAGLESS_PKTS, > + TCA_FRER_RECOVER_OUT_OF_ORDER_PKTS, > + TCA_FRER_RECOVER_ROGUE_PKTS, > + TCA_FRER_RECOVER_LOST_PKTS, > + TCA_FRER_RECOVER_RESETS, > + __TCA_FRER_MAX, > +}; > +#define TCA_FRER_MAX (__TCA_FRER_MAX - 1) > + > +enum tc_frer_tag_action { > + TCA_FRER_TAG_NULL, > + TCA_FRER_TAG_PUSH, > + TCA_FRER_TAG_POP, > +}; > + > +enum tc_frer_tag_type { > + TCA_FRER_TAG_RTAG, > + TCA_FRER_TAG_HSR, > + TCA_FRER_TAG_PRP, > +}; > + > +enum tc_frer_rcvy_alg { > + TCA_FRER_RCVY_VECTOR_ALG, > + TCA_FRER_RCVY_MATCH_ALG, > +}; > + > +#endif > diff --git a/net/sched/Kconfig b/net/sched/Kconfig > index 1e8ab4749c6c..93e2687042c2 100644 > --- a/net/sched/Kconfig > +++ b/net/sched/Kconfig > @@ -997,6 +997,19 @@ config NET_ACT_GATE > To compile this code as a module, choose M here: the > module will be called act_gate. > > +config NET_ACT_FRER > + tristate "Frame frer tc action" > + depends on NET_CLS_ACT > + help > + Say Y here to support frame replication and elimination for > + reliability, which is defined by IEEE 802.1CB. > + This action allow to add a frer tag. It also allow to remove > + the frer tag and drop repeat frames. > + > + If unsure, say N. > + To compile this code as a module, choose M here: the > + module will be called act_frer. > + > config NET_IFE_SKBMARK > tristate "Support to encoding decoding skb mark on IFE action" > depends on NET_ACT_IFE > diff --git a/net/sched/Makefile b/net/sched/Makefile > index dd14ef413fda..69e7e94be567 100644 > --- a/net/sched/Makefile > +++ b/net/sched/Makefile > @@ -32,6 +32,7 @@ obj-$(CONFIG_NET_IFE_SKBTCINDEX) += act_meta_skbtcindex.o > obj-$(CONFIG_NET_ACT_TUNNEL_KEY)+= act_tunnel_key.o > obj-$(CONFIG_NET_ACT_CT) += act_ct.o > obj-$(CONFIG_NET_ACT_GATE) += act_gate.o > +obj-$(CONFIG_NET_ACT_FRER) += act_frer.o > obj-$(CONFIG_NET_SCH_FIFO) += sch_fifo.o > obj-$(CONFIG_NET_SCH_CBQ) += sch_cbq.o > obj-$(CONFIG_NET_SCH_HTB) += sch_htb.o > diff --git a/net/sched/act_frer.c b/net/sched/act_frer.c > new file mode 100644 > index 000000000000..6f8ec5782d3d > --- /dev/null > +++ b/net/sched/act_frer.c > @@ -0,0 +1,695 @@ > +// SPDX-License-Identifier: GPL-2.0-or-later > +/* Copyright 2021 NXP */ > + > +#include <linux/module.h> > +#include <linux/types.h> > +#include <linux/kernel.h> > +#include <linux/string.h> > +#include <linux/errno.h> > +#include <linux/skbuff.h> > +#include <linux/rtnetlink.h> > +#include <linux/init.h> > +#include <linux/slab.h> > +#include <net/act_api.h> > +#include <net/netlink.h> > +#include <net/pkt_cls.h> > +#include <net/tc_act/tc_frer.h> > + > +#define FRER_SEQ_SPACE 16 > +#define FRER_RCVY_RESET_MSEC 100 > +#define FRER_RCVY_INVALID_SEQ 0x100 > +#define FRER_RCVY_PASSED 0 > +#define FRER_RCVY_DISCARDED -1 > + > +static unsigned int frer_net_id; > +static struct tc_action_ops act_frer_ops; > + > +struct r_tag { > + __be16 reserved; > + __be16 sequence_nr; > + __be16 encap_proto; > +} __packed; > + > +struct rtag_ethhdr { > + struct ethhdr ethhdr; > + struct r_tag h_rtag; > +} __packed; > + > +struct rtag_vlan_ethhdr { > + struct vlan_ethhdr vlanhdr; > + struct r_tag h_rtag; > +} __packed; > + > +static const struct nla_policy frer_policy[TCA_FRER_MAX + 1] = { > + [TCA_FRER_PARMS] = > + NLA_POLICY_EXACT_LEN(sizeof(struct tc_frer)), > + [TCA_FRER_TAG_TYPE] = { .type = NLA_U8 }, > + [TCA_FRER_TAG_ACTION] = { .type = NLA_U8 }, > + [TCA_FRER_RECOVER] = { .type = NLA_U8 }, > + [TCA_FRER_RECOVER_ALG] = { .type = NLA_U8 }, > + [TCA_FRER_RECOVER_HISTORY_LEN] = { .type = NLA_U8 }, > + [TCA_FRER_RECOVER_RESET_TM] = { .type = NLA_U64 }, > +}; > + > +static void frer_seq_recovery_reset(struct tcf_frer *frer_act); > + > +static enum hrtimer_restart frer_hrtimer_func(struct hrtimer *timer) > +{ > + struct tcf_frer *frer_act = container_of(timer, struct tcf_frer, > + hrtimer); > + ktime_t remaining_tm; > + > + frer_seq_recovery_reset(frer_act); > + > + remaining_tm = (ktime_t)(frer_act->rcvy_reset_msec * 1000000); > + > + hrtimer_forward(timer, timer->base->get_time(), remaining_tm); > + > + return HRTIMER_RESTART; > +} > + > +static int frer_rtag_decode(struct sk_buff *skb) > +{ > + struct rtag_vlan_ethhdr *rtag_vlan_hdr; > + struct rtag_ethhdr *rtag_hdr; > + struct vlan_ethhdr *vlanhdr; > + struct ethhdr *ethhdr; > + struct r_tag *rtag; > + bool is_vlan; > + u16 sequence; > + u16 proto; > + > + ethhdr = (struct ethhdr *)skb_mac_header(skb); > + proto = ethhdr->h_proto; > + is_vlan = false; > + > + if (proto == htons(ETH_P_8021Q)) { > + vlanhdr = (struct vlan_ethhdr *)ethhdr; > + proto = vlanhdr->h_vlan_encapsulated_proto; > + is_vlan = true; > + } > + > + if (proto != htons(ETH_P_RTAG)) > + return FRER_RCVY_INVALID_SEQ; > + > + if (is_vlan) { > + rtag_vlan_hdr = (struct rtag_vlan_ethhdr *)ethhdr; > + rtag = &rtag_vlan_hdr->h_rtag; > + } else { > + rtag_hdr = (struct rtag_ethhdr *)ethhdr; > + rtag = &rtag_hdr->h_rtag; > + } > + > + sequence = ntohs(rtag->sequence_nr); > + > + return sequence; > +} > + > +static int frer_seq_generation_alg(struct tcf_frer *frer_act) > +{ > + u32 gen_seq_max = frer_act->seq_space - 1; > + u32 gen_seq_num = frer_act->gen_seq_num; > + int sequence_number; > + > + sequence_number = gen_seq_num; > + > + if (gen_seq_num >= gen_seq_max) > + gen_seq_num = 0; > + else > + gen_seq_num++; > + > + frer_act->gen_seq_num = gen_seq_num; > + > + return sequence_number; > +} > + > +static int frer_rtag_encode(struct sk_buff *skb, struct tcf_frer *frer_act) > +{ > + struct vlan_ethhdr *vlanhdr; > + struct ethhdr *ethhdr; > + struct r_tag *rtag; > + int rtag_len, head_len; > + unsigned char *dst, *src, *p; > + __be16 *proto, proto_val; > + > + ethhdr = (struct ethhdr *)skb_mac_header(skb); > + if (ethhdr->h_proto == htons(ETH_P_8021Q)) { > + vlanhdr = (struct vlan_ethhdr *)ethhdr; > + p = (unsigned char *)(vlanhdr + 1); > + proto = &vlanhdr->h_vlan_encapsulated_proto; > + } else { > + p = (unsigned char *)(ethhdr + 1); > + proto = ðhdr->h_proto; > + } > + > + proto_val = *proto; > + *proto = htons(ETH_P_RTAG); > + > + src = skb_mac_header(skb); > + head_len = p - src; > + > + rtag_len = sizeof(struct r_tag); > + if (skb_cow_head(skb, rtag_len) < 0) > + return -ENOMEM; > + > + skb_push(skb, rtag_len); > + skb->mac_header -= rtag_len; > + > + dst = skb_mac_header(skb); > + memmove(dst, src, head_len); > + > + rtag = (struct r_tag *)(dst + head_len); > + rtag->encap_proto = proto_val; > + rtag->sequence_nr = htons(frer_act->gen_seq_num); > + rtag->reserved = 0; > + > + return 0; > +} > + > +static void frer_rtag_pop(struct sk_buff *skb, struct tcf_frer *frer_act) > +{ > + struct vlan_ethhdr *vlanhdr; > + struct ethhdr *ethhdr; > + struct r_tag *rtag; > + int rtag_len, head_len; > + unsigned char *dst, *src, *p; > + __be16 *proto; > + > + ethhdr = (struct ethhdr *)skb_mac_header(skb); > + > + if (ethhdr->h_proto == htons(ETH_P_8021Q)) { > + vlanhdr = (struct vlan_ethhdr *)ethhdr; > + p = (unsigned char *)(vlanhdr + 1); > + proto = &vlanhdr->h_vlan_encapsulated_proto; > + } else { > + p = (unsigned char *)(ethhdr + 1); > + proto = ðhdr->h_proto; > + } > + > + if (*proto != htons(ETH_P_RTAG)) > + return; > + > + rtag = (struct r_tag *)p; > + rtag_len = sizeof(struct r_tag); > + *proto = rtag->encap_proto; > + > + src = skb_mac_header(skb); > + head_len = p - src; > + > + skb->data = skb_mac_header(skb); > + skb_pull(skb, rtag_len); > + > + skb_reset_mac_header(skb); > + > + if (skb->ip_summed == CHECKSUM_PARTIAL) > + skb->csum_start += rtag_len; > + > + dst = skb_mac_header(skb); > + memmove(dst, src, head_len); > +} > + > +static const struct tcf_frer_proto_ops rtag_ops = { > + .encode = frer_rtag_encode, > + .decode = frer_rtag_decode, > + .tag_pop = frer_rtag_pop, > +}; > + > +static int tcf_frer_init(struct net *net, struct nlattr *nla, > + struct nlattr *est, struct tc_action **a, > + int ovr, int bind, bool rtnl_held, > + struct tcf_proto *tp, u32 flags, > + struct netlink_ext_ack *extack) > +{ > + struct tc_action_net *tn = net_generic(net, frer_net_id); > + struct nlattr *tb[TCA_FRER_MAX + 1]; > + struct tcf_chain *goto_ch = NULL; > + struct tcf_frer *frer_act; > + struct tc_frer *parm; > + int ret = 0, err, index; > + ktime_t remaining_tm; > + > + if (!nla) > + return -EINVAL; > + > + err = nla_parse_nested(tb, TCA_FRER_MAX, nla, frer_policy, extack); > + if (err < 0) > + return err; > + > + if (!tb[TCA_FRER_PARMS]) > + return -EINVAL; > + > + parm = nla_data(tb[TCA_FRER_PARMS]); > + index = parm->index; > + > + err = tcf_idr_check_alloc(tn, &index, a, bind); > + if (err < 0) > + return err; > + > + if (err && bind) > + return 0; > + > + if (!err) { > + ret = tcf_idr_create(tn, index, est, a, > + &act_frer_ops, bind, false, 0); > + > + if (ret) { > + tcf_idr_cleanup(tn, index); > + return ret; > + } > + } else if (!ovr) { > + tcf_idr_release(*a, bind); > + return -EEXIST; > + } > + > + err = tcf_action_check_ctrlact(parm->action, tp, &goto_ch, extack); > + if (err < 0) > + goto release_idr; > + > + frer_act = to_frer(*a); > + > + spin_lock_bh(&frer_act->tcf_lock); > + goto_ch = tcf_action_set_ctrlact(*a, parm->action, goto_ch); > + > + frer_act->tag_type = nla_get_u8(tb[TCA_FRER_TAG_TYPE]); > + frer_act->tag_action = nla_get_u8(tb[TCA_FRER_TAG_ACTION]); > + frer_act->recover = nla_get_u8(tb[TCA_FRER_RECOVER]); > + frer_act->rcvy_alg = nla_get_u8(tb[TCA_FRER_RECOVER_ALG]); > + frer_act->rcvy_history_len = nla_get_u8(tb[TCA_FRER_RECOVER_HISTORY_LEN]); > + frer_act->rcvy_reset_msec = nla_get_u64(tb[TCA_FRER_RECOVER_RESET_TM]); > + > + frer_act->gen_seq_num = 0; > + frer_act->seq_space = 1 << FRER_SEQ_SPACE; > + frer_act->rcvy_seq_num = 0; > + frer_act->seq_history = 0xFFFFFFFF; > + frer_act->rcvy_take_noseq = true; > + > + switch (frer_act->tag_type) { > + case TCA_FRER_TAG_RTAG: > + frer_act->proto_ops = &rtag_ops; > + break; > + case TCA_FRER_TAG_HSR: > + case TCA_FRER_TAG_PRP: > + default: > + spin_unlock_bh(&frer_act->tcf_lock); > + return -EOPNOTSUPP; > + } > + > + if (frer_act->recover && frer_act->rcvy_reset_msec) { > + hrtimer_init(&frer_act->hrtimer, CLOCK_TAI, > + HRTIMER_MODE_REL_SOFT); > + frer_act->hrtimer.function = frer_hrtimer_func; > + > + remaining_tm = (ktime_t)(frer_act->rcvy_reset_msec * 1000000); > + hrtimer_start(&frer_act->hrtimer, remaining_tm, > + HRTIMER_MODE_REL_SOFT); > + } > + > + spin_unlock_bh(&frer_act->tcf_lock); > + > + if (goto_ch) > + tcf_chain_put_by_act(goto_ch); > + > + return ret; > + > +release_idr: > + tcf_idr_release(*a, bind); > + return err; > +} > + > +static void frer_seq_recovery_reset(struct tcf_frer *frer_act) > +{ > + spin_lock(&frer_act->tcf_lock); > + if (frer_act->rcvy_alg == TCA_FRER_RCVY_VECTOR_ALG) { > + frer_act->rcvy_seq_num = frer_act->seq_space - 1; > + frer_act->seq_history = 0; > + } > + frer_act->cps_seq_rcvy_resets++; > + frer_act->take_any = true; > + spin_unlock(&frer_act->tcf_lock); > +} > + > +static void frer_shift_seq_history(int value, struct tcf_frer *frer_act) > +{ > + int history_len = frer_act->rcvy_history_len; > + > + if ((frer_act->seq_history & BIT(history_len - 1)) == 0) > + frer_act->cps_seq_rcvy_lost_pkts++; > + > + frer_act->seq_history <<= 1; > + > + if (value) > + frer_act->seq_history |= BIT(0); > +} > + > +static int frer_vector_rcvy_alg(struct tcf_frer *frer_act, int sequence, > + bool individual) > +{ > + struct hrtimer *timer = &frer_act->hrtimer; > + bool reset_timer = false; > + ktime_t remaining_tm; > + int delta, ret; > + > + if (sequence == FRER_RCVY_INVALID_SEQ) { > + frer_act->cps_seq_rcvy_tagless_pkts++; > + if (frer_act->rcvy_take_noseq) { > + reset_timer = true; > + ret = FRER_RCVY_PASSED; > + goto out; > + } else { > + return FRER_RCVY_DISCARDED; > + } > + } > + > + delta = (sequence - frer_act->rcvy_seq_num) & (frer_act->seq_space - 1); > + /* -(RecovSeqSpace/2) <= delta <= ((RecovSeqSpace/2)-1) */ > + if (delta & (frer_act->seq_space / 2)) > + delta -= frer_act->seq_space; > + > + if (frer_act->take_any) { > + frer_act->take_any = false; > + frer_act->seq_history |= BIT(0); > + frer_act->rcvy_seq_num = sequence; > + > + reset_timer = true; > + ret = FRER_RCVY_PASSED; > + goto out; > + } > + > + if (delta >= frer_act->rcvy_history_len || > + delta <= -frer_act->rcvy_history_len) { > + /* Packet is out-of-range. */ > + frer_act->cps_seq_rcvy_rogue_pkts++; > + > + if (individual) > + reset_timer = true; > + > + ret = FRER_RCVY_DISCARDED; > + goto out; > + } else if (delta <= 0) { > + /* Packet is old and in SequenceHistory. */ > + if (frer_act->seq_history & BIT(-delta)) { > + if (individual) > + reset_timer = true; > + > + /* Packet has been seen. */ > + ret = FRER_RCVY_DISCARDED; > + goto out; > + } else { > + /* Packet has not been seen. */ > + frer_act->seq_history |= BIT(-delta); > + frer_act->cps_seq_rcvy_out_of_order_pkts++; > + > + reset_timer = true; > + ret = FRER_RCVY_PASSED; > + goto out; > + } > + } else { > + /* Packet is not too far ahead of the one we want. */ > + if (delta != 1) > + frer_act->cps_seq_rcvy_out_of_order_pkts++; > + > + while (--delta) > + frer_shift_seq_history(0, frer_act); > + frer_shift_seq_history(1, frer_act); > + frer_act->rcvy_seq_num = sequence; > + > + reset_timer = true; > + ret = FRER_RCVY_PASSED; > + goto out; > + } > +out: > + if (reset_timer && frer_act->rcvy_reset_msec) { > + remaining_tm = > + (ktime_t)(frer_act->rcvy_reset_msec * 1000000); > + hrtimer_start(timer, remaining_tm, HRTIMER_MODE_REL_SOFT); > + } > + > + return ret; > +} > + > +static int frer_match_rcvy_alg(struct tcf_frer *frer_act, int sequence, > + bool individual) > +{ > + struct hrtimer *timer = &frer_act->hrtimer; > + bool reset_timer = false; > + ktime_t remaining_tm; > + int delta, ret; > + > + if (sequence == FRER_RCVY_INVALID_SEQ) { > + frer_act->cps_seq_rcvy_tagless_pkts++; > + > + return FRER_RCVY_PASSED; > + } > + > + if (frer_act->take_any) { > + frer_act->take_any = false; > + frer_act->rcvy_seq_num = sequence; > + > + reset_timer = true; > + ret = FRER_RCVY_PASSED; > + goto out; > + } > + > + delta = sequence - frer_act->rcvy_seq_num; > + if (delta) { > + /* Packet has not been seen, accept it. */ > + if (delta != 1) > + frer_act->cps_seq_rcvy_out_of_order_pkts++; > + > + frer_act->rcvy_seq_num = sequence; > + > + reset_timer = true; > + ret = FRER_RCVY_PASSED; > + goto out; > + } else { > + if (individual) > + reset_timer = true; > + > + /* Packet has been seen. Do not forward. */ > + ret = FRER_RCVY_DISCARDED; > + goto out; > + } > + > +out: > + if (reset_timer && frer_act->rcvy_reset_msec) { > + remaining_tm = (ktime_t)(frer_act->rcvy_reset_msec * 1000000); > + hrtimer_start(timer, remaining_tm, HRTIMER_MODE_REL_SOFT); > + } > + > + return ret; > +} > + > +static int tcf_frer_act(struct sk_buff *skb, const struct tc_action *a, > + struct tcf_result *res) > +{ > + struct tcf_frer *frer_act = to_frer(a); > + bool ingress, individual; > + int ret, retval; > + int sequence; > + > + tcf_lastuse_update(&frer_act->tcf_tm); > + tcf_action_update_bstats(&frer_act->common, skb); > + > + retval = READ_ONCE(frer_act->tcf_action); > + > + sequence = frer_act->proto_ops->decode(skb); > + > + ingress = skb_at_tc_ingress(skb); > + individual = ingress; > + > + if (frer_act->recover) { > + spin_lock(&frer_act->tcf_lock); > + > + if (frer_act->rcvy_alg == TCA_FRER_RCVY_VECTOR_ALG) > + ret = frer_vector_rcvy_alg(frer_act, sequence, > + individual); > + else > + ret = frer_match_rcvy_alg(frer_act, sequence, > + individual); > + if (ret) { > + frer_act->tcf_qstats.drops++; > + retval = TC_ACT_SHOT; > + } > + > + if (frer_act->tag_action == TCA_FRER_TAG_POP) > + frer_act->proto_ops->tag_pop(skb, frer_act); > + > + spin_unlock(&frer_act->tcf_lock); > + > + return retval; > + } > + > + if (frer_act->tag_action == TCA_FRER_TAG_PUSH && > + sequence == FRER_RCVY_INVALID_SEQ) { > + spin_lock(&frer_act->tcf_lock); > + > + frer_seq_generation_alg(frer_act); > + > + frer_act->proto_ops->encode(skb, frer_act); > + > + spin_unlock(&frer_act->tcf_lock); > + } > + > + return retval; > +} > + > +static int tcf_frer_dump(struct sk_buff *skb, struct tc_action *a, > + int bind, int ref) > +{ > + unsigned char *b = skb_tail_pointer(skb); > + struct tcf_frer *frer_act = to_frer(a); > + struct tc_frer opt = { > + .index = frer_act->tcf_index, > + .refcnt = refcount_read(&frer_act->tcf_refcnt) - ref, > + .bindcnt = atomic_read(&frer_act->tcf_bindcnt) - bind, > + }; > + struct tcf_t t; > + > + spin_lock_bh(&frer_act->tcf_lock); > + opt.action = frer_act->tcf_action; > + > + if (nla_put(skb, TCA_FRER_PARMS, sizeof(opt), &opt)) > + goto nla_put_failure; > + > + if (nla_put_u8(skb, TCA_FRER_TAG_TYPE, frer_act->tag_type)) > + goto nla_put_failure; > + > + if (nla_put_u8(skb, TCA_FRER_TAG_ACTION, frer_act->tag_action)) > + goto nla_put_failure; > + > + if (nla_put_u8(skb, TCA_FRER_RECOVER, frer_act->recover)) > + goto nla_put_failure; > + > + if (nla_put_u8(skb, TCA_FRER_RECOVER_ALG, frer_act->rcvy_alg)) > + goto nla_put_failure; > + > + if (nla_put_u8(skb, TCA_FRER_RECOVER_HISTORY_LEN, > + frer_act->rcvy_history_len)) > + goto nla_put_failure; > + > + if (nla_put_u64_64bit(skb, TCA_FRER_RECOVER_RESET_TM, > + frer_act->rcvy_reset_msec, TCA_FRER_PAD)) > + goto nla_put_failure; > + > + if (nla_put_u32(skb, TCA_FRER_RECOVER_TAGLESS_PKTS, > + frer_act->cps_seq_rcvy_tagless_pkts)) > + goto nla_put_failure; > + > + if (nla_put_u32(skb, TCA_FRER_RECOVER_OUT_OF_ORDER_PKTS, > + frer_act->cps_seq_rcvy_out_of_order_pkts)) > + goto nla_put_failure; > + > + if (nla_put_u32(skb, TCA_FRER_RECOVER_ROGUE_PKTS, > + frer_act->cps_seq_rcvy_rogue_pkts)) > + goto nla_put_failure; > + > + if (nla_put_u32(skb, TCA_FRER_RECOVER_LOST_PKTS, > + frer_act->cps_seq_rcvy_lost_pkts)) > + goto nla_put_failure; > + > + if (nla_put_u32(skb, TCA_FRER_RECOVER_RESETS, > + frer_act->cps_seq_rcvy_resets)) > + goto nla_put_failure; > + > + tcf_tm_dump(&t, &frer_act->tcf_tm); > + if (nla_put_64bit(skb, TCA_FRER_TM, sizeof(t), > + &t, TCA_FRER_PAD)) > + goto nla_put_failure; > + spin_unlock_bh(&frer_act->tcf_lock); > + > + return skb->len; > + > +nla_put_failure: > + spin_unlock_bh(&frer_act->tcf_lock); > + nlmsg_trim(skb, b); > + > + return -1; > +} > + > +static int tcf_frer_walker(struct net *net, struct sk_buff *skb, > + struct netlink_callback *cb, int type, > + const struct tc_action_ops *ops, > + struct netlink_ext_ack *extack) > +{ > + struct tc_action_net *tn = net_generic(net, frer_net_id); > + > + return tcf_generic_walker(tn, skb, cb, type, ops, extack); > +} > + > +static int tcf_frer_search(struct net *net, struct tc_action **a, u32 index) > +{ > + struct tc_action_net *tn = net_generic(net, frer_net_id); > + > + return tcf_idr_search(tn, a, index); > +} > + > +static void tcf_frer_stats_update(struct tc_action *a, u64 bytes, u64 packets, > + u64 drops, u64 lastuse, bool hw) > +{ > + struct tcf_frer *frer_act = to_frer(a); > + struct tcf_t *tm = &frer_act->tcf_tm; > + > + tcf_action_update_stats(a, bytes, packets, drops, hw); > + tm->lastuse = max_t(u64, tm->lastuse, lastuse); > +} > + > +static void tcf_frer_cleanup(struct tc_action *a) > +{ > + struct tcf_frer *frer_act = to_frer(a); > + > + if (frer_act->rcvy_reset_msec) > + hrtimer_cancel(&frer_act->hrtimer); > +} > + > +static size_t tcf_frer_get_fill_size(const struct tc_action *act) > +{ > + return nla_total_size(sizeof(struct tc_frer)); > +} > + > +static struct tc_action_ops act_frer_ops = { > + .kind = "frer", > + .id = TCA_ID_FRER, > + .owner = THIS_MODULE, > + .act = tcf_frer_act, > + .init = tcf_frer_init, > + .cleanup = tcf_frer_cleanup, > + .dump = tcf_frer_dump, > + .walk = tcf_frer_walker, > + .stats_update = tcf_frer_stats_update, > + .get_fill_size = tcf_frer_get_fill_size, > + .lookup = tcf_frer_search, > + .size = sizeof(struct tcf_frer), > +}; > + > +static __net_init int frer_init_net(struct net *net) > +{ > + struct tc_action_net *tn = net_generic(net, frer_net_id); > + > + return tc_action_net_init(net, tn, &act_frer_ops); > +} > + > +static void __net_exit frer_exit_net(struct list_head *net_list) > +{ > + tc_action_net_exit(net_list, frer_net_id); > +}; > + > +static struct pernet_operations frer_net_ops = { > + .init = frer_init_net, > + .exit_batch = frer_exit_net, > + .id = &frer_net_id, > + .size = sizeof(struct tc_action_net), > +}; > + > +static int __init frer_init_module(void) > +{ > + return tcf_register_action(&act_frer_ops, &frer_net_ops); > +} > + > +static void __exit frer_cleanup_module(void) > +{ > + tcf_unregister_action(&act_frer_ops, &frer_net_ops); > +} > + > +module_init(frer_init_module); > +module_exit(frer_cleanup_module); > +MODULE_LICENSE("GPL v2"); > diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c > index 2ef8f5a6205a..353184987427 100644 > --- a/net/sched/cls_api.c > +++ b/net/sched/cls_api.c > @@ -39,6 +39,7 @@ > #include <net/tc_act/tc_ct.h> > #include <net/tc_act/tc_mpls.h> > #include <net/tc_act/tc_gate.h> > +#include <net/tc_act/tc_frer.h> > #include <net/flow_offload.h> > > extern const struct nla_policy rtm_tca_policy[TCA_MAX + 1]; > @@ -3706,6 +3707,16 @@ int tc_setup_flow_action(struct flow_action *flow_action, > err = tcf_gate_get_entries(entry, act); > if (err) > goto err_out_locked; > + } else if (is_tcf_frer(act)) { > + entry->id = FLOW_ACTION_FRER; > + entry->frer.tag_type = to_frer(act)->tag_type; > + entry->frer.tag_action = to_frer(act)->tag_action; > + entry->frer.recover = to_frer(act)->recover; > + entry->frer.rcvy_alg = to_frer(act)->rcvy_alg; > + entry->frer.rcvy_history_len = > + to_frer(act)->rcvy_history_len; > + entry->frer.rcvy_reset_msec = > + to_frer(act)->rcvy_reset_msec; > } else { > err = -EOPNOTSUPP; > goto err_out_locked; > -- > 2.17.1 >
Hi Vinicius, On Sep 29, 2021 at 6:35:59 +0000, Vinicius Costa Gomes wrote: > > This patch introduce a frer action to implement frame replication and > > elimination for reliability, which is defined in IEEE P802.1CB. > > > > An action seems, to me, a bit too limiting/fine grained for a frame replication > and elimination feature. > > At least I want to hear the reasons that the current hsr/prp support cannot be > extended to support one more tag format/protocol. > > And the current name for the spec is IEEE 802.1CB-2017. > 802.1CB can be set on bridge ports, and need to use bridge forward Function as a relay system. It only works on identified streams, unrecognized flows still need to pass through the bridged network normally. But current hsr/prp seems only support two ports, and cannot use the ports in bridge. It's hard to implement FRER functions on current HSR driver. You can see chapter "D.2 Example 2: Various stack positions" in IEEE 802.1CB-2017, Protocol stack for relay system is like follows: Stream Transfer Function | | | Sequence generation | Sequence encode/decode Stream identification Active Stream identification | | | Internal LAN---- Relay system forwarding | | | MAC MAC MAC Use port actions to easily implement FRER tag add/delete, split, and recover functions. Current HSR/PRP driver can be used for port HSR/PRP set, and tc-frer Action to be used for stream RTAG/HSR/PRP set and recover. Thanks, Xiaoliang
Hi Vinicius, On Wed, Sep 29, 2021 at 10:25:58AM +0000, Xiaoliang Yang wrote: > Hi Vinicius, > > On Sep 29, 2021 at 6:35:59 +0000, Vinicius Costa Gomes wrote: > > > This patch introduce a frer action to implement frame replication and > > > elimination for reliability, which is defined in IEEE P802.1CB. > > > > > > > An action seems, to me, a bit too limiting/fine grained for a frame replication > > and elimination feature. > > > > At least I want to hear the reasons that the current hsr/prp support cannot be > > extended to support one more tag format/protocol. > > > > And the current name for the spec is IEEE 802.1CB-2017. > > > 802.1CB can be set on bridge ports, and need to use bridge forward > Function as a relay system. It only works on identified streams, > unrecognized flows still need to pass through the bridged network > normally. > > But current hsr/prp seems only support two ports, and cannot use the > ports in bridge. It's hard to implement FRER functions on current HSR > driver. > > You can see chapter "D.2 Example 2: Various stack positions" in IEEE 802.1CB-2017, > Protocol stack for relay system is like follows: > > Stream Transfer Function > | | > | Sequence generation > | Sequence encode/decode > Stream identification Active Stream identification > | | > | Internal LAN---- Relay system forwarding > | | | > MAC MAC MAC > > Use port actions to easily implement FRER tag add/delete, split, and > recover functions. > > Current HSR/PRP driver can be used for port HSR/PRP set, and tc-frer > Action to be used for stream RTAG/HSR/PRP set and recover. Did Xiaoliang answer your question satisfactorily? :)
Hi Vladimir, Vladimir Oltean <vladimir.oltean@nxp.com> writes: > Hi Vinicius, > > On Wed, Sep 29, 2021 at 10:25:58AM +0000, Xiaoliang Yang wrote: >> Hi Vinicius, >> >> On Sep 29, 2021 at 6:35:59 +0000, Vinicius Costa Gomes wrote: >> > > This patch introduce a frer action to implement frame replication and >> > > elimination for reliability, which is defined in IEEE P802.1CB. >> > > >> > >> > An action seems, to me, a bit too limiting/fine grained for a frame replication >> > and elimination feature. >> > >> > At least I want to hear the reasons that the current hsr/prp support cannot be >> > extended to support one more tag format/protocol. >> > >> > And the current name for the spec is IEEE 802.1CB-2017. >> > >> 802.1CB can be set on bridge ports, and need to use bridge forward >> Function as a relay system. It only works on identified streams, >> unrecognized flows still need to pass through the bridged network >> normally. >> >> But current hsr/prp seems only support two ports, and cannot use the >> ports in bridge. It's hard to implement FRER functions on current HSR >> driver. >> >> You can see chapter "D.2 Example 2: Various stack positions" in IEEE 802.1CB-2017, >> Protocol stack for relay system is like follows: >> >> Stream Transfer Function >> | | >> | Sequence generation >> | Sequence encode/decode >> Stream identification Active Stream identification >> | | >> | Internal LAN---- Relay system forwarding >> | | | >> MAC MAC MAC >> >> Use port actions to easily implement FRER tag add/delete, split, and >> recover functions. >> >> Current HSR/PRP driver can be used for port HSR/PRP set, and tc-frer >> Action to be used for stream RTAG/HSR/PRP set and recover. > > Did Xiaoliang answer your question satisfactorily? :) Oh, yes, the answer was very good. I was taking some time to read the 802.1CB spec, and try to think how things would fit together so I can ask better questions next time :-) Cheers,
Xiaoliang Yang <xiaoliang.yang_1@nxp.com> writes: > Hi Vinicius, > > On Sep 29, 2021 at 6:35:59 +0000, Vinicius Costa Gomes wrote: >> > This patch introduce a frer action to implement frame replication and >> > elimination for reliability, which is defined in IEEE P802.1CB. >> > >> >> An action seems, to me, a bit too limiting/fine grained for a frame replication >> and elimination feature. >> >> At least I want to hear the reasons that the current hsr/prp support cannot be >> extended to support one more tag format/protocol. >> >> And the current name for the spec is IEEE 802.1CB-2017. >> > 802.1CB can be set on bridge ports, and need to use bridge forward > Function as a relay system. It only works on identified streams, > unrecognized flows still need to pass through the bridged network > normally. This ("only on identified streams") is the strongest argument so far to have FRER also as an action, in adition to the current hsr netdevice approach. > > But current hsr/prp seems only support two ports, and cannot use the > ports in bridge. It's hard to implement FRER functions on current HSR > driver. That the hsr netdevice only support two ports, I think is more a bug than a design issue. Which will need to get fixed at some point. Speaking of functions, one thing that might be interesting is trying to see if it makes sense to make part of the current hsr functionality a "library" so it can be used by tc-frer as well. (less duplication of bugs). > > You can see chapter "D.2 Example 2: Various stack positions" in IEEE 802.1CB-2017, > Protocol stack for relay system is like follows: > > Stream Transfer Function > | | > | Sequence generation > | Sequence encode/decode > Stream identification Active Stream identification > | | > | Internal LAN---- Relay system forwarding > | | | > MAC MAC MAC > > Use port actions to easily implement FRER tag add/delete, split, and > recover functions. > > Current HSR/PRP driver can be used for port HSR/PRP set, and tc-frer > Action to be used for stream RTAG/HSR/PRP set and recover. I am still reading the spec and trying to imagine how things would fit together: - for which use cases tc-frer would be useful; - for which use cases the hsr netdevice would be useful; - would it make sense to have them in the same system? > > Thanks, > Xiaoliang Cheers,
On Fri, Oct 01, 2021 at 10:27:12AM -0700, Vinicius Costa Gomes wrote: > Xiaoliang Yang <xiaoliang.yang_1@nxp.com> writes: > > > Hi Vinicius, > > > > On Sep 29, 2021 at 6:35:59 +0000, Vinicius Costa Gomes wrote: > >> > This patch introduce a frer action to implement frame replication and > >> > elimination for reliability, which is defined in IEEE P802.1CB. > >> > > >> > >> An action seems, to me, a bit too limiting/fine grained for a frame replication > >> and elimination feature. > >> > >> At least I want to hear the reasons that the current hsr/prp support cannot be > >> extended to support one more tag format/protocol. > >> > >> And the current name for the spec is IEEE 802.1CB-2017. > >> > > 802.1CB can be set on bridge ports, and need to use bridge forward > > Function as a relay system. It only works on identified streams, > > unrecognized flows still need to pass through the bridged network > > normally. > > This ("only on identified streams") is the strongest argument so far to > have FRER also as an action, in adition to the current hsr netdevice > approach. > > > > > But current hsr/prp seems only support two ports, and cannot use the > > ports in bridge. It's hard to implement FRER functions on current HSR > > driver. > > That the hsr netdevice only support two ports, I think is more a bug > than a design issue. Which will need to get fixed at some point. What do you mean 'a bug'? HSR and PRP, as protocols, use _two_ ports, see IEC 62439-3, that's where the "D" (doubly attached node) in DANH and DANP comes from. There's no TANH/TANH for "triply attached node". It doesn't scale. > Speaking of functions, one thing that might be interesting is trying to > see if it makes sense to make part of the current hsr functionality a > "library" so it can be used by tc-frer as well. (less duplication of > bugs). You mean tc-frer should inherit from the get-go the plethora of bugs from the unmaintained hsr driver? :) That would be good for hsr, which is in a pretty poor state, but the design of the 802.1CB spec isn't really in its favor sadly. > > > > You can see chapter "D.2 Example 2: Various stack positions" in IEEE 802.1CB-2017, > > Protocol stack for relay system is like follows: > > > > Stream Transfer Function > > | | > > | Sequence generation > > | Sequence encode/decode > > Stream identification Active Stream identification > > | | > > | Internal LAN---- Relay system forwarding > > | | | > > MAC MAC MAC > > > > Use port actions to easily implement FRER tag add/delete, split, and > > recover functions. > > > > Current HSR/PRP driver can be used for port HSR/PRP set, and tc-frer > > Action to be used for stream RTAG/HSR/PRP set and recover. > > I am still reading the spec and trying to imagine how things would fit > together: > - for which use cases tc-frer would be useful; > - for which use cases the hsr netdevice would be useful; > - would it make sense to have them in the same system? You could use FRER in networks where normally you'd use HSR (aka rings). In fact the 802.1CB demonstration I have, which uses the NXP tsntool program with the downstream genetlink tsn interface, does exactly that: https://github.com/vladimiroltean/tsn-scripts Basically FRER is IEEE's take on redundancy protocols and more like a generalization of HSR/PRP, the big changes are: - not limited to two (or any number of) ports - more than one type of stream/flow identification function: can look at source/destination MAC, source/destination IP, VLAN, and most importantly, there can be passive stream identification functions (don't modify the packet) and active stream identification functions (do modify the packet). Please note that we've already started modeling IEEE 802.1CB stream identification functions as tc flower filters, since those map nicely on top. We use these for PSFP (former 802.1Qci) tc-police and tc-gate actions (yes, tc-police is single-bucket and color-unaware, that needs to be improved). Basically IEEE 802.1CB is a huge toolbox, the spec gives you the tools but it doesn't tell you how to use them, that's why the stream identification functions are so generic and decoupled from the redundancy protocol itself. In both HSR and PRP, sequence numbers are kept per source MAC address, that is absolutely baken into the standard. But think about this. When the sequence number is kept per source station, frames sent from node A to multiple destinations (nodes B and C) will be part of the same stream. So nodes B and C will see discontinuities in the sequence numbers when node A talks to them. The opposite is true as well. When sequence numbers are kept per destination MAC address, then frames sent from multiple talkers (nodes A and B) to the same destination (node C) will be interpreted as part of the same stream by the listener. So there will be jumps in sequence numbers seen by C when A and B are simultaneously transmitting to it. Which type of stream identification you need depends on the traffic you need to support, and the topology. So again, IEEE 802.1CB doesn't tell you what to do, but it gives you the tools. You can do source MAC based stream identification, and you can emulate HSR, or you can do something that encompasses both source node information as well as destination node information. It's one whole degree of freedom more flexible, plain and simple. And the topologies are not limited to: - the rings that HSR supports - the disjoint IP networks that PRP supports but are rather generic graphs. I fully expect there to be hardware out there already that can convert between the HSR/PRP frame format on one set of ports to 802.1CB frame format on another set of ports. Maybe that's something that some thought needs to be put into.
On Fri, Oct 01, 2021 at 08:55:24PM +0300, Vladimir Oltean wrote: > On Fri, Oct 01, 2021 at 10:27:12AM -0700, Vinicius Costa Gomes wrote: > > Xiaoliang Yang <xiaoliang.yang_1@nxp.com> writes: > > > > > Hi Vinicius, > > > > > > On Sep 29, 2021 at 6:35:59 +0000, Vinicius Costa Gomes wrote: > > >> > This patch introduce a frer action to implement frame replication and > > >> > elimination for reliability, which is defined in IEEE P802.1CB. > > >> > > > >> > > >> An action seems, to me, a bit too limiting/fine grained for a frame replication > > >> and elimination feature. > > >> > > >> At least I want to hear the reasons that the current hsr/prp support cannot be > > >> extended to support one more tag format/protocol. > > >> > > >> And the current name for the spec is IEEE 802.1CB-2017. > > >> > > > 802.1CB can be set on bridge ports, and need to use bridge forward > > > Function as a relay system. It only works on identified streams, > > > unrecognized flows still need to pass through the bridged network > > > normally. > > > > This ("only on identified streams") is the strongest argument so far to > > have FRER also as an action, in adition to the current hsr netdevice > > approach. > > > > > > > > But current hsr/prp seems only support two ports, and cannot use the > > > ports in bridge. It's hard to implement FRER functions on current HSR > > > driver. > > > > That the hsr netdevice only support two ports, I think is more a bug > > than a design issue. Which will need to get fixed at some point. > > What do you mean 'a bug'? HSR and PRP, as protocols, use _two_ ports, > see IEC 62439-3, that's where the "D" (doubly attached node) in DANH and > DANP comes from. There's no TANH/TANH for "triply attached node". > It doesn't scale. > > > Speaking of functions, one thing that might be interesting is trying to > > see if it makes sense to make part of the current hsr functionality a > > "library" so it can be used by tc-frer as well. (less duplication of > > bugs). > > You mean tc-frer should inherit from the get-go the plethora of bugs > from the unmaintained hsr driver? :) > > That would be good for hsr, which is in a pretty poor state, but the > design of the 802.1CB spec isn't really in its favor sadly. > > > > > > > You can see chapter "D.2 Example 2: Various stack positions" in IEEE 802.1CB-2017, > > > Protocol stack for relay system is like follows: > > > > > > Stream Transfer Function > > > | | > > > | Sequence generation > > > | Sequence encode/decode > > > Stream identification Active Stream identification > > > | | > > > | Internal LAN---- Relay system forwarding > > > | | | > > > MAC MAC MAC > > > > > > Use port actions to easily implement FRER tag add/delete, split, and > > > recover functions. > > > > > > Current HSR/PRP driver can be used for port HSR/PRP set, and tc-frer > > > Action to be used for stream RTAG/HSR/PRP set and recover. > > > > I am still reading the spec and trying to imagine how things would fit > > together: > > - for which use cases tc-frer would be useful; > > - for which use cases the hsr netdevice would be useful; > > - would it make sense to have them in the same system? > > You could use FRER in networks where normally you'd use HSR (aka rings). > In fact the 802.1CB demonstration I have, which uses the NXP tsntool > program with the downstream genetlink tsn interface, does exactly that: > https://github.com/vladimiroltean/tsn-scripts > > Basically FRER is IEEE's take on redundancy protocols and more like a > generalization of HSR/PRP, the big changes are: > - not limited to two (or any number of) ports > - more than one type of stream/flow identification function: can look at > source/destination MAC, source/destination IP, VLAN, and most > importantly, there can be passive stream identification functions (don't > modify the packet) and active stream identification functions (do > modify the packet). > > Please note that we've already started modeling IEEE 802.1CB stream > identification functions as tc flower filters, since those map nicely on top. > We use these for PSFP (former 802.1Qci) tc-police and tc-gate actions > (yes, tc-police is single-bucket and color-unaware, that needs to be improved). > > Basically IEEE 802.1CB is a huge toolbox, the spec gives you the tools > but it doesn't tell you how to use them, that's why the stream > identification functions are so generic and decoupled from the > redundancy protocol itself. > > In both HSR and PRP, sequence numbers are kept per source MAC address, > that is absolutely baken into the standard. > > But think about this. When the sequence number is kept per source > station, frames sent from node A to multiple destinations (nodes B and C) > will be part of the same stream. So nodes B and C will see > discontinuities in the sequence numbers when node A talks to them. > > The opposite is true as well. When sequence numbers are kept per > destination MAC address, then frames sent from multiple talkers (nodes A > and B) to the same destination (node C) will be interpreted as part of > the same stream by the listener. So there will be jumps in sequence > numbers seen by C when A and B are simultaneously transmitting to it. > > Which type of stream identification you need depends on the traffic you > need to support, and the topology. > > So again, IEEE 802.1CB doesn't tell you what to do, but it gives you the > tools. You can do source MAC based stream identification, and you can > emulate HSR, or you can do something that encompasses both source node > information as well as destination node information. > > It's one whole degree of freedom more flexible, plain and simple. > And the topologies are not limited to: > - the rings that HSR supports > - the disjoint IP networks that PRP supports > but are rather generic graphs. > > I fully expect there to be hardware out there already that can convert > between the HSR/PRP frame format on one set of ports to 802.1CB frame > format on another set of ports. Maybe that's something that some thought > needs to be put into. And Xiaoliang, can you PLEASE remove the following email addresses from further submissions you make: andre.guedes@linux.intel.com vishal@chelsio.com ivan.khoronzhuk@linaro.org m-karicheri2@ti.com Arvid.Brodin@xdin.com You also copied some of them on all 6 submissions for the PSFP offload series. It gets really annoying to get email bounces from these addresses. I've removed them from this email thread. Thanks.
Hi, Vladimir Oltean <vladimir.oltean@nxp.com> writes: > On Fri, Oct 01, 2021 at 10:27:12AM -0700, Vinicius Costa Gomes wrote: >> Xiaoliang Yang <xiaoliang.yang_1@nxp.com> writes: >> >> > Hi Vinicius, >> > >> > On Sep 29, 2021 at 6:35:59 +0000, Vinicius Costa Gomes wrote: >> >> > This patch introduce a frer action to implement frame replication and >> >> > elimination for reliability, which is defined in IEEE P802.1CB. >> >> > >> >> >> >> An action seems, to me, a bit too limiting/fine grained for a frame replication >> >> and elimination feature. >> >> >> >> At least I want to hear the reasons that the current hsr/prp support cannot be >> >> extended to support one more tag format/protocol. >> >> >> >> And the current name for the spec is IEEE 802.1CB-2017. >> >> >> > 802.1CB can be set on bridge ports, and need to use bridge forward >> > Function as a relay system. It only works on identified streams, >> > unrecognized flows still need to pass through the bridged network >> > normally. >> >> This ("only on identified streams") is the strongest argument so far to >> have FRER also as an action, in adition to the current hsr netdevice >> approach. >> >> > >> > But current hsr/prp seems only support two ports, and cannot use the >> > ports in bridge. It's hard to implement FRER functions on current HSR >> > driver. >> >> That the hsr netdevice only support two ports, I think is more a bug >> than a design issue. Which will need to get fixed at some point. > > What do you mean 'a bug'? HSR and PRP, as protocols, use _two_ ports, > see IEC 62439-3, that's where the "D" (doubly attached node) in DANH and > DANP comes from. There's no TANH/TANH for "triply attached node". > It doesn't scale. First of all, thank you for taking the time to write such detailed answer, really helpful. Another spec that I should take some time and read if I want to keep commenting on this stuff. > >> Speaking of functions, one thing that might be interesting is trying to >> see if it makes sense to make part of the current hsr functionality a >> "library" so it can be used by tc-frer as well. (less duplication of >> bugs). > > You mean tc-frer should inherit from the get-go the plethora of bugs > from the unmaintained hsr driver? :) > > That would be good for hsr, which is in a pretty poor state, but the > design of the 802.1CB spec isn't really in its favor sadly. > Fair enough. So what I am going to suggest is for you folks to write in the RFC how to use tc-frer (the "toolbox" idea) in "IEC 62439-9 mode", not necessary to implement it, just to write it down. The idea is that we have a path forward to better maintained alternatives, as you said, if we stop recommending people to use/experiment with net/hsr. >> > >> > You can see chapter "D.2 Example 2: Various stack positions" in IEEE 802.1CB-2017, >> > Protocol stack for relay system is like follows: >> > >> > Stream Transfer Function >> > | | >> > | Sequence generation >> > | Sequence encode/decode >> > Stream identification Active Stream identification >> > | | >> > | Internal LAN---- Relay system forwarding >> > | | | >> > MAC MAC MAC >> > >> > Use port actions to easily implement FRER tag add/delete, split, and >> > recover functions. >> > >> > Current HSR/PRP driver can be used for port HSR/PRP set, and tc-frer >> > Action to be used for stream RTAG/HSR/PRP set and recover. >> >> I am still reading the spec and trying to imagine how things would fit >> together: >> - for which use cases tc-frer would be useful; >> - for which use cases the hsr netdevice would be useful; >> - would it make sense to have them in the same system? > > You could use FRER in networks where normally you'd use HSR (aka rings). > In fact the 802.1CB demonstration I have, which uses the NXP tsntool > program with the downstream genetlink tsn interface, does exactly that: > https://github.com/vladimiroltean/tsn-scripts > After a very quick look, interesting stuff here. Will take a better look. (even more reading for the weekend) > Basically FRER is IEEE's take on redundancy protocols and more like a > generalization of HSR/PRP, the big changes are: > - not limited to two (or any number of) ports > - more than one type of stream/flow identification function: can look at > source/destination MAC, source/destination IP, VLAN, and most > importantly, there can be passive stream identification functions (don't > modify the packet) and active stream identification functions (do > modify the packet). > > Please note that we've already started modeling IEEE 802.1CB stream > identification functions as tc flower filters, since those map nicely on top. > We use these for PSFP (former 802.1Qci) tc-police and tc-gate actions > (yes, tc-police is single-bucket and color-unaware, that needs to be improved). > > Basically IEEE 802.1CB is a huge toolbox, the spec gives you the tools > but it doesn't tell you how to use them, that's why the stream > identification functions are so generic and decoupled from the > redundancy protocol itself. > > In both HSR and PRP, sequence numbers are kept per source MAC address, > that is absolutely baken into the standard. > > But think about this. When the sequence number is kept per source > station, frames sent from node A to multiple destinations (nodes B and C) > will be part of the same stream. So nodes B and C will see > discontinuities in the sequence numbers when node A talks to them. > > The opposite is true as well. When sequence numbers are kept per > destination MAC address, then frames sent from multiple talkers (nodes A > and B) to the same destination (node C) will be interpreted as part of > the same stream by the listener. So there will be jumps in sequence > numbers seen by C when A and B are simultaneously transmitting to it. > > Which type of stream identification you need depends on the traffic you > need to support, and the topology. Good insight here. Even if I can imagine those simple stream identification functions working on simple topologies, I totally get you point. > > So again, IEEE 802.1CB doesn't tell you what to do, but it gives you the > tools. You can do source MAC based stream identification, and you can > emulate HSR, or you can do something that encompasses both source node > information as well as destination node information. > > It's one whole degree of freedom more flexible, plain and simple. > And the topologies are not limited to: > - the rings that HSR supports > - the disjoint IP networks that PRP supports > but are rather generic graphs. > > I fully expect there to be hardware out there already that can convert > between the HSR/PRP frame format on one set of ports to 802.1CB frame > format on another set of ports. Maybe that's something that some thought > needs to be put into. In short, I am reasonably satisfied with the proposal that tc-frer offers a superset of net/hsr can do. Suggestions for the cover letter: - Expand a bit on the whole superset/toolbox idea; - Document how to use the toolbox to emulate HSR/PRP; Cheers,
Xiaoliang Yang <xiaoliang.yang_1@nxp.com> writes: > This patch introduce a frer action to implement frame replication and > elimination for reliability, which is defined in IEEE P802.1CB. > > There are two modes for frer action: generate and push the tag, recover > and pop the tag. frer tag has three types: RTAG, HSR, and PRP. This > patch only supports RTAG now. > > User can push the tag on egress port of the talker device, recover and > pop the tag on ingress port of the listener device. When it's a relay > system, push the tag on ingress port, or set individual recover on > ingress port. Set the sequence recover on egress port. > > Use action "mirred" to do split function, and use "vlan-modify" to do > active stream identification function on relay system. > > Below is the setting example in user space: > push rtag on relay system: > > tc qdisc add dev swp0 clsact > > tc filter add dev swp0 ingress protocol 802.1Q flower \ > skip_hw dst_mac 00:01:02:03:04:05 vlan_id 1 \ > action frer rtag tag-action tag-push > > split stream: > > tc filter add dev swp0 ingress protocol 802.1Q flower \ > skip_hw dst_mac 00:01:02:03:04:05 vlan_id 1 \ > action mirred egress mirror dev swp1 > > individual recover: > > tc filter add dev swp0 ingress protocol 802.1Q flower > skip_hw dst_mac 00:01:02:03:04:06 vlan_id 1 \ > action frer rtag recover \ > alg vector history-length 32 reset-time 10000 > > recover and pop rtag: > > tc filter add dev swp0 egress protocol 802.1Q flower > skip_hw dst_mac 00:01:02:03:04:06 vlan_id 1 \ > action frer rtag recover \ > alg vector history-length 32 reset-time 10000 \ > tag-action tag-pop > > Signed-off-by: Xiaoliang Yang <xiaoliang.yang_1@nxp.com> > --- > include/net/flow_offload.h | 9 + > include/net/tc_act/tc_frer.h | 52 +++ > include/uapi/linux/if_ether.h | 1 + > include/uapi/linux/pkt_cls.h | 1 + > include/uapi/linux/tc_act/tc_frer.h | 50 ++ > net/sched/Kconfig | 13 + > net/sched/Makefile | 1 + > net/sched/act_frer.c | 695 ++++++++++++++++++++++++++++ > net/sched/cls_api.c | 11 + > 9 files changed, 833 insertions(+) > create mode 100644 include/net/tc_act/tc_frer.h > create mode 100644 include/uapi/linux/tc_act/tc_frer.h > create mode 100644 net/sched/act_frer.c > > diff --git a/include/net/flow_offload.h b/include/net/flow_offload.h > index 3961461d9c8b..cfa9b69cec69 100644 > --- a/include/net/flow_offload.h > +++ b/include/net/flow_offload.h > @@ -148,6 +148,7 @@ enum flow_action_id { > FLOW_ACTION_MPLS_MANGLE, > FLOW_ACTION_GATE, > FLOW_ACTION_PPPOE_PUSH, > + FLOW_ACTION_FRER, > NUM_FLOW_ACTIONS, > }; > > @@ -278,6 +279,14 @@ struct flow_action_entry { > struct { /* FLOW_ACTION_PPPOE_PUSH */ > u16 sid; > } pppoe; > + struct { > + u8 tag_type; > + u8 tag_action; > + u8 recover; > + u8 rcvy_alg; > + u8 rcvy_history_len; > + u8 rcvy_reset_msec; Optional: it wasn't clear until I took a closer look at the code that "rcvy" means "recovery" in this context. Perhaps write "recovery" in full would make it clearer? > + } frer; > }; > struct flow_action_cookie *cookie; /* user defined action cookie */ > }; > diff --git a/include/net/tc_act/tc_frer.h b/include/net/tc_act/tc_frer.h > new file mode 100644 > index 000000000000..b2ad2b2a3fe1 > --- /dev/null > +++ b/include/net/tc_act/tc_frer.h > @@ -0,0 +1,52 @@ > +/* SPDX-License-Identifier: GPL-2.0-or-later */ > +/* Copyright 2021 NXP */ > + > +#ifndef __NET_TC_FRER_H > +#define __NET_TC_FRER_H > + > +#include <net/act_api.h> > +#include <linux/tc_act/tc_frer.h> > + > +struct tcf_frer; > + > +struct tcf_frer_proto_ops { > + int (*encode)(struct sk_buff *skb, struct tcf_frer *frer_act); > + int (*decode)(struct sk_buff *skb); > + void (*tag_pop)(struct sk_buff *skb, struct tcf_frer *frer_act); > +}; > + > +struct tcf_frer { > + struct tc_action common; > + u8 tag_type; > + u8 tag_action; > + u8 recover; > + u8 rcvy_alg; > + u8 rcvy_history_len; > + u64 rcvy_reset_msec; > + u32 gen_seq_num; > + u32 rcvy_seq_num; > + u64 seq_space; > + u32 seq_history; > + bool take_any; > + bool rcvy_take_noseq; > + u32 cps_seq_rcvy_lost_pkts; > + u32 cps_seq_rcvy_tagless_pkts; > + u32 cps_seq_rcvy_out_of_order_pkts; > + u32 cps_seq_rcvy_rogue_pkts; > + u32 cps_seq_rcvy_resets; > + struct hrtimer hrtimer; > + const struct tcf_frer_proto_ops *proto_ops; > +}; > + > +#define to_frer(a) ((struct tcf_frer *)a) > + > +static inline bool is_tcf_frer(const struct tc_action *a) > +{ > +#ifdef CONFIG_NET_CLS_ACT > + if (a->ops && a->ops->id == TCA_ID_FRER) > + return true; > +#endif > + return false; > +} > + > +#endif > diff --git a/include/uapi/linux/if_ether.h b/include/uapi/linux/if_ether.h > index 5f589c7a8382..812aa75f7f23 100644 > --- a/include/uapi/linux/if_ether.h > +++ b/include/uapi/linux/if_ether.h > @@ -114,6 +114,7 @@ > #define ETH_P_EDSA 0xDADA /* Ethertype DSA [ NOT AN OFFICIALLY REGISTERED ID ] */ > #define ETH_P_DSA_8021Q 0xDADB /* Fake VLAN Header for DSA [ NOT AN OFFICIALLY REGISTERED ID ] */ > #define ETH_P_IFE 0xED3E /* ForCES inter-FE LFB type */ > +#define ETH_P_RTAG 0xF1C1 /* Redundancy Tag(IEEE 802.1CB) */ > #define ETH_P_AF_IUCV 0xFBFB /* IBM af_iucv [ NOT AN OFFICIALLY REGISTERED ID ] */ > > #define ETH_P_802_3_MIN 0x0600 /* If the value in the ethernet type is less than this value > diff --git a/include/uapi/linux/pkt_cls.h b/include/uapi/linux/pkt_cls.h > index 6836ccb9c45d..a3fc0c478a65 100644 > --- a/include/uapi/linux/pkt_cls.h > +++ b/include/uapi/linux/pkt_cls.h > @@ -136,6 +136,7 @@ enum tca_id { > TCA_ID_MPLS, > TCA_ID_CT, > TCA_ID_GATE, > + TCA_ID_FRER, > /* other actions go here */ > __TCA_ID_MAX = 255 > }; > diff --git a/include/uapi/linux/tc_act/tc_frer.h b/include/uapi/linux/tc_act/tc_frer.h > new file mode 100644 > index 000000000000..cd86274483e7 > --- /dev/null > +++ b/include/uapi/linux/tc_act/tc_frer.h > @@ -0,0 +1,50 @@ > +/* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */ > +/* Copyright 2021 NXP */ > + > +#ifndef __LINUX_TC_FRER_H > +#define __LINUX_TC_FRER_H > + > +#include <linux/pkt_cls.h> > + > +struct tc_frer { > + tc_gen; > +}; > + > +enum { > + TCA_FRER_UNSPEC, > + TCA_FRER_TM, > + TCA_FRER_PARMS, > + TCA_FRER_PAD, > + TCA_FRER_TAG_TYPE, > + TCA_FRER_TAG_ACTION, > + TCA_FRER_RECOVER, > + TCA_FRER_RECOVER_ALG, > + TCA_FRER_RECOVER_HISTORY_LEN, > + TCA_FRER_RECOVER_RESET_TM, > + TCA_FRER_RECOVER_TAGLESS_PKTS, > + TCA_FRER_RECOVER_OUT_OF_ORDER_PKTS, > + TCA_FRER_RECOVER_ROGUE_PKTS, > + TCA_FRER_RECOVER_LOST_PKTS, > + TCA_FRER_RECOVER_RESETS, > + __TCA_FRER_MAX, > +}; > +#define TCA_FRER_MAX (__TCA_FRER_MAX - 1) > + > +enum tc_frer_tag_action { > + TCA_FRER_TAG_NULL, > + TCA_FRER_TAG_PUSH, > + TCA_FRER_TAG_POP, > +}; > + > +enum tc_frer_tag_type { > + TCA_FRER_TAG_RTAG, > + TCA_FRER_TAG_HSR, > + TCA_FRER_TAG_PRP, > +}; > + > +enum tc_frer_rcvy_alg { > + TCA_FRER_RCVY_VECTOR_ALG, > + TCA_FRER_RCVY_MATCH_ALG, > +}; > + > +#endif > diff --git a/net/sched/Kconfig b/net/sched/Kconfig > index 1e8ab4749c6c..93e2687042c2 100644 > --- a/net/sched/Kconfig > +++ b/net/sched/Kconfig > @@ -997,6 +997,19 @@ config NET_ACT_GATE > To compile this code as a module, choose M here: the > module will be called act_gate. > > +config NET_ACT_FRER > + tristate "Frame frer tc action" > + depends on NET_CLS_ACT > + help > + Say Y here to support frame replication and elimination for > + reliability, which is defined by IEEE 802.1CB. > + This action allow to add a frer tag. It also allow to remove > + the frer tag and drop repeat frames. > + > + If unsure, say N. > + To compile this code as a module, choose M here: the > + module will be called act_frer. > + > config NET_IFE_SKBMARK > tristate "Support to encoding decoding skb mark on IFE action" > depends on NET_ACT_IFE > diff --git a/net/sched/Makefile b/net/sched/Makefile > index dd14ef413fda..69e7e94be567 100644 > --- a/net/sched/Makefile > +++ b/net/sched/Makefile > @@ -32,6 +32,7 @@ obj-$(CONFIG_NET_IFE_SKBTCINDEX) += act_meta_skbtcindex.o > obj-$(CONFIG_NET_ACT_TUNNEL_KEY)+= act_tunnel_key.o > obj-$(CONFIG_NET_ACT_CT) += act_ct.o > obj-$(CONFIG_NET_ACT_GATE) += act_gate.o > +obj-$(CONFIG_NET_ACT_FRER) += act_frer.o > obj-$(CONFIG_NET_SCH_FIFO) += sch_fifo.o > obj-$(CONFIG_NET_SCH_CBQ) += sch_cbq.o > obj-$(CONFIG_NET_SCH_HTB) += sch_htb.o > diff --git a/net/sched/act_frer.c b/net/sched/act_frer.c > new file mode 100644 > index 000000000000..6f8ec5782d3d > --- /dev/null > +++ b/net/sched/act_frer.c > @@ -0,0 +1,695 @@ > +// SPDX-License-Identifier: GPL-2.0-or-later > +/* Copyright 2021 NXP */ > + > +#include <linux/module.h> > +#include <linux/types.h> > +#include <linux/kernel.h> > +#include <linux/string.h> > +#include <linux/errno.h> > +#include <linux/skbuff.h> > +#include <linux/rtnetlink.h> > +#include <linux/init.h> > +#include <linux/slab.h> > +#include <net/act_api.h> > +#include <net/netlink.h> > +#include <net/pkt_cls.h> > +#include <net/tc_act/tc_frer.h> > + > +#define FRER_SEQ_SPACE 16 > +#define FRER_RCVY_RESET_MSEC 100 > +#define FRER_RCVY_INVALID_SEQ 0x100 > +#define FRER_RCVY_PASSED 0 > +#define FRER_RCVY_DISCARDED -1 > + > +static unsigned int frer_net_id; > +static struct tc_action_ops act_frer_ops; > + > +struct r_tag { > + __be16 reserved; > + __be16 sequence_nr; > + __be16 encap_proto; > +} __packed; > + > +struct rtag_ethhdr { > + struct ethhdr ethhdr; > + struct r_tag h_rtag; > +} __packed; > + > +struct rtag_vlan_ethhdr { > + struct vlan_ethhdr vlanhdr; > + struct r_tag h_rtag; > +} __packed; > + > +static const struct nla_policy frer_policy[TCA_FRER_MAX + 1] = { > + [TCA_FRER_PARMS] = > + NLA_POLICY_EXACT_LEN(sizeof(struct tc_frer)), > + [TCA_FRER_TAG_TYPE] = { .type = NLA_U8 }, > + [TCA_FRER_TAG_ACTION] = { .type = NLA_U8 }, > + [TCA_FRER_RECOVER] = { .type = NLA_U8 }, > + [TCA_FRER_RECOVER_ALG] = { .type = NLA_U8 }, > + [TCA_FRER_RECOVER_HISTORY_LEN] = { .type = NLA_U8 }, > + [TCA_FRER_RECOVER_RESET_TM] = { .type = NLA_U64 }, > +}; > + > +static void frer_seq_recovery_reset(struct tcf_frer *frer_act); > + > +static enum hrtimer_restart frer_hrtimer_func(struct hrtimer *timer) > +{ > + struct tcf_frer *frer_act = container_of(timer, struct tcf_frer, > + hrtimer); > + ktime_t remaining_tm; > + > + frer_seq_recovery_reset(frer_act); > + > + remaining_tm = (ktime_t)(frer_act->rcvy_reset_msec * 1000000); I think using ms_to_ktime() would be more readable. There are a few other places where this suggestion applies. > + > + hrtimer_forward(timer, timer->base->get_time(), remaining_tm); > + > + return HRTIMER_RESTART; > +} > + > +static int frer_rtag_decode(struct sk_buff *skb) > +{ > + struct rtag_vlan_ethhdr *rtag_vlan_hdr; > + struct rtag_ethhdr *rtag_hdr; > + struct vlan_ethhdr *vlanhdr; > + struct ethhdr *ethhdr; > + struct r_tag *rtag; > + bool is_vlan; > + u16 sequence; > + u16 proto; > + > + ethhdr = (struct ethhdr *)skb_mac_header(skb); > + proto = ethhdr->h_proto; > + is_vlan = false; > + > + if (proto == htons(ETH_P_8021Q)) { > + vlanhdr = (struct vlan_ethhdr *)ethhdr; > + proto = vlanhdr->h_vlan_encapsulated_proto; > + is_vlan = true; > + } > + > + if (proto != htons(ETH_P_RTAG)) > + return FRER_RCVY_INVALID_SEQ; > + > + if (is_vlan) { > + rtag_vlan_hdr = (struct rtag_vlan_ethhdr *)ethhdr; > + rtag = &rtag_vlan_hdr->h_rtag; > + } else { > + rtag_hdr = (struct rtag_ethhdr *)ethhdr; > + rtag = &rtag_hdr->h_rtag; > + } > + > + sequence = ntohs(rtag->sequence_nr); > + > + return sequence; > +} > + > +static int frer_seq_generation_alg(struct tcf_frer *frer_act) > +{ > + u32 gen_seq_max = frer_act->seq_space - 1; > + u32 gen_seq_num = frer_act->gen_seq_num; > + int sequence_number; > + > + sequence_number = gen_seq_num; > + > + if (gen_seq_num >= gen_seq_max) > + gen_seq_num = 0; > + else > + gen_seq_num++; > + > + frer_act->gen_seq_num = gen_seq_num; > + > + return sequence_number; > +} > + > +static int frer_rtag_encode(struct sk_buff *skb, struct tcf_frer *frer_act) > +{ > + struct vlan_ethhdr *vlanhdr; > + struct ethhdr *ethhdr; > + struct r_tag *rtag; > + int rtag_len, head_len; > + unsigned char *dst, *src, *p; > + __be16 *proto, proto_val; > + > + ethhdr = (struct ethhdr *)skb_mac_header(skb); > + if (ethhdr->h_proto == htons(ETH_P_8021Q)) { > + vlanhdr = (struct vlan_ethhdr *)ethhdr; > + p = (unsigned char *)(vlanhdr + 1); > + proto = &vlanhdr->h_vlan_encapsulated_proto; > + } else { > + p = (unsigned char *)(ethhdr + 1); > + proto = ðhdr->h_proto; > + } > + > + proto_val = *proto; > + *proto = htons(ETH_P_RTAG); > + > + src = skb_mac_header(skb); > + head_len = p - src; > + > + rtag_len = sizeof(struct r_tag); > + if (skb_cow_head(skb, rtag_len) < 0) > + return -ENOMEM; > + > + skb_push(skb, rtag_len); > + skb->mac_header -= rtag_len; > + > + dst = skb_mac_header(skb); > + memmove(dst, src, head_len); > + > + rtag = (struct r_tag *)(dst + head_len); > + rtag->encap_proto = proto_val; > + rtag->sequence_nr = htons(frer_act->gen_seq_num); > + rtag->reserved = 0; > + > + return 0; > +} > + > +static void frer_rtag_pop(struct sk_buff *skb, struct tcf_frer *frer_act) > +{ > + struct vlan_ethhdr *vlanhdr; > + struct ethhdr *ethhdr; > + struct r_tag *rtag; > + int rtag_len, head_len; > + unsigned char *dst, *src, *p; > + __be16 *proto; > + > + ethhdr = (struct ethhdr *)skb_mac_header(skb); > + > + if (ethhdr->h_proto == htons(ETH_P_8021Q)) { > + vlanhdr = (struct vlan_ethhdr *)ethhdr; > + p = (unsigned char *)(vlanhdr + 1); > + proto = &vlanhdr->h_vlan_encapsulated_proto; > + } else { > + p = (unsigned char *)(ethhdr + 1); > + proto = ðhdr->h_proto; > + } > + > + if (*proto != htons(ETH_P_RTAG)) > + return; > + > + rtag = (struct r_tag *)p; > + rtag_len = sizeof(struct r_tag); > + *proto = rtag->encap_proto; > + > + src = skb_mac_header(skb); > + head_len = p - src; > + > + skb->data = skb_mac_header(skb); > + skb_pull(skb, rtag_len); > + > + skb_reset_mac_header(skb); > + > + if (skb->ip_summed == CHECKSUM_PARTIAL) > + skb->csum_start += rtag_len; > + > + dst = skb_mac_header(skb); > + memmove(dst, src, head_len); > +} > + > +static const struct tcf_frer_proto_ops rtag_ops = { > + .encode = frer_rtag_encode, > + .decode = frer_rtag_decode, > + .tag_pop = frer_rtag_pop, > +}; > + > +static int tcf_frer_init(struct net *net, struct nlattr *nla, > + struct nlattr *est, struct tc_action **a, > + int ovr, int bind, bool rtnl_held, > + struct tcf_proto *tp, u32 flags, > + struct netlink_ext_ack *extack) > +{ > + struct tc_action_net *tn = net_generic(net, frer_net_id); > + struct nlattr *tb[TCA_FRER_MAX + 1]; > + struct tcf_chain *goto_ch = NULL; > + struct tcf_frer *frer_act; > + struct tc_frer *parm; > + int ret = 0, err, index; > + ktime_t remaining_tm; > + > + if (!nla) > + return -EINVAL; > + > + err = nla_parse_nested(tb, TCA_FRER_MAX, nla, frer_policy, extack); > + if (err < 0) > + return err; > + > + if (!tb[TCA_FRER_PARMS]) > + return -EINVAL; > + > + parm = nla_data(tb[TCA_FRER_PARMS]); > + index = parm->index; > + > + err = tcf_idr_check_alloc(tn, &index, a, bind); > + if (err < 0) > + return err; > + > + if (err && bind) > + return 0; > + > + if (!err) { > + ret = tcf_idr_create(tn, index, est, a, > + &act_frer_ops, bind, false, 0); > + > + if (ret) { > + tcf_idr_cleanup(tn, index); > + return ret; > + } > + } else if (!ovr) { > + tcf_idr_release(*a, bind); > + return -EEXIST; > + } > + > + err = tcf_action_check_ctrlact(parm->action, tp, &goto_ch, extack); > + if (err < 0) > + goto release_idr; > + > + frer_act = to_frer(*a); > + > + spin_lock_bh(&frer_act->tcf_lock); > + goto_ch = tcf_action_set_ctrlact(*a, parm->action, goto_ch); > + > + frer_act->tag_type = nla_get_u8(tb[TCA_FRER_TAG_TYPE]); > + frer_act->tag_action = nla_get_u8(tb[TCA_FRER_TAG_ACTION]); > + frer_act->recover = nla_get_u8(tb[TCA_FRER_RECOVER]); > + frer_act->rcvy_alg = nla_get_u8(tb[TCA_FRER_RECOVER_ALG]); > + frer_act->rcvy_history_len = nla_get_u8(tb[TCA_FRER_RECOVER_HISTORY_LEN]); > + frer_act->rcvy_reset_msec = nla_get_u64(tb[TCA_FRER_RECOVER_RESET_TM]); > + > + frer_act->gen_seq_num = 0; > + frer_act->seq_space = 1 << FRER_SEQ_SPACE; > + frer_act->rcvy_seq_num = 0; > + frer_act->seq_history = 0xFFFFFFFF; > + frer_act->rcvy_take_noseq = true; > + > + switch (frer_act->tag_type) { > + case TCA_FRER_TAG_RTAG: > + frer_act->proto_ops = &rtag_ops; > + break; > + case TCA_FRER_TAG_HSR: > + case TCA_FRER_TAG_PRP: > + default: > + spin_unlock_bh(&frer_act->tcf_lock); > + return -EOPNOTSUPP; > + } > + > + if (frer_act->recover && frer_act->rcvy_reset_msec) { > + hrtimer_init(&frer_act->hrtimer, CLOCK_TAI, > + HRTIMER_MODE_REL_SOFT); > + frer_act->hrtimer.function = frer_hrtimer_func; > + > + remaining_tm = (ktime_t)(frer_act->rcvy_reset_msec * 1000000); > + hrtimer_start(&frer_act->hrtimer, remaining_tm, > + HRTIMER_MODE_REL_SOFT); > + } > + > + spin_unlock_bh(&frer_act->tcf_lock); > + > + if (goto_ch) > + tcf_chain_put_by_act(goto_ch); > + > + return ret; > + > +release_idr: > + tcf_idr_release(*a, bind); > + return err; > +} > + > +static void frer_seq_recovery_reset(struct tcf_frer *frer_act) > +{ > + spin_lock(&frer_act->tcf_lock); > + if (frer_act->rcvy_alg == TCA_FRER_RCVY_VECTOR_ALG) { > + frer_act->rcvy_seq_num = frer_act->seq_space - 1; > + frer_act->seq_history = 0; > + } > + frer_act->cps_seq_rcvy_resets++; > + frer_act->take_any = true; > + spin_unlock(&frer_act->tcf_lock); > +} > + > +static void frer_shift_seq_history(int value, struct tcf_frer *frer_act) > +{ > + int history_len = frer_act->rcvy_history_len; > + > + if ((frer_act->seq_history & BIT(history_len - 1)) == 0) > + frer_act->cps_seq_rcvy_lost_pkts++; > + > + frer_act->seq_history <<= 1; > + > + if (value) > + frer_act->seq_history |= BIT(0); > +} > + > +static int frer_vector_rcvy_alg(struct tcf_frer *frer_act, int sequence, > + bool individual) > +{ > + struct hrtimer *timer = &frer_act->hrtimer; > + bool reset_timer = false; > + ktime_t remaining_tm; > + int delta, ret; > + > + if (sequence == FRER_RCVY_INVALID_SEQ) { > + frer_act->cps_seq_rcvy_tagless_pkts++; > + if (frer_act->rcvy_take_noseq) { > + reset_timer = true; > + ret = FRER_RCVY_PASSED; > + goto out; > + } else { > + return FRER_RCVY_DISCARDED; > + } > + } > + > + delta = (sequence - frer_act->rcvy_seq_num) & (frer_act->seq_space - 1); > + /* -(RecovSeqSpace/2) <= delta <= ((RecovSeqSpace/2)-1) */ > + if (delta & (frer_act->seq_space / 2)) > + delta -= frer_act->seq_space; > + > + if (frer_act->take_any) { > + frer_act->take_any = false; > + frer_act->seq_history |= BIT(0); > + frer_act->rcvy_seq_num = sequence; > + > + reset_timer = true; > + ret = FRER_RCVY_PASSED; > + goto out; > + } > + > + if (delta >= frer_act->rcvy_history_len || > + delta <= -frer_act->rcvy_history_len) { > + /* Packet is out-of-range. */ > + frer_act->cps_seq_rcvy_rogue_pkts++; > + > + if (individual) > + reset_timer = true; > + > + ret = FRER_RCVY_DISCARDED; > + goto out; > + } else if (delta <= 0) { > + /* Packet is old and in SequenceHistory. */ > + if (frer_act->seq_history & BIT(-delta)) { > + if (individual) > + reset_timer = true; > + > + /* Packet has been seen. */ > + ret = FRER_RCVY_DISCARDED; > + goto out; > + } else { > + /* Packet has not been seen. */ > + frer_act->seq_history |= BIT(-delta); > + frer_act->cps_seq_rcvy_out_of_order_pkts++; > + > + reset_timer = true; > + ret = FRER_RCVY_PASSED; > + goto out; > + } > + } else { > + /* Packet is not too far ahead of the one we want. */ > + if (delta != 1) > + frer_act->cps_seq_rcvy_out_of_order_pkts++; > + > + while (--delta) > + frer_shift_seq_history(0, frer_act); > + frer_shift_seq_history(1, frer_act); > + frer_act->rcvy_seq_num = sequence; > + > + reset_timer = true; > + ret = FRER_RCVY_PASSED; > + goto out; > + } > +out: > + if (reset_timer && frer_act->rcvy_reset_msec) { > + remaining_tm = > + (ktime_t)(frer_act->rcvy_reset_msec * 1000000); > + hrtimer_start(timer, remaining_tm, HRTIMER_MODE_REL_SOFT); > + } > + > + return ret; > +} > + > +static int frer_match_rcvy_alg(struct tcf_frer *frer_act, int sequence, > + bool individual) > +{ > + struct hrtimer *timer = &frer_act->hrtimer; > + bool reset_timer = false; > + ktime_t remaining_tm; > + int delta, ret; > + > + if (sequence == FRER_RCVY_INVALID_SEQ) { > + frer_act->cps_seq_rcvy_tagless_pkts++; > + > + return FRER_RCVY_PASSED; > + } > + > + if (frer_act->take_any) { > + frer_act->take_any = false; > + frer_act->rcvy_seq_num = sequence; > + > + reset_timer = true; > + ret = FRER_RCVY_PASSED; > + goto out; > + } > + > + delta = sequence - frer_act->rcvy_seq_num; > + if (delta) { > + /* Packet has not been seen, accept it. */ > + if (delta != 1) > + frer_act->cps_seq_rcvy_out_of_order_pkts++; > + > + frer_act->rcvy_seq_num = sequence; > + > + reset_timer = true; > + ret = FRER_RCVY_PASSED; > + goto out; > + } else { > + if (individual) > + reset_timer = true; > + > + /* Packet has been seen. Do not forward. */ > + ret = FRER_RCVY_DISCARDED; > + goto out; > + } > + > +out: > + if (reset_timer && frer_act->rcvy_reset_msec) { > + remaining_tm = (ktime_t)(frer_act->rcvy_reset_msec * 1000000); > + hrtimer_start(timer, remaining_tm, HRTIMER_MODE_REL_SOFT); > + } > + > + return ret; > +} > + > +static int tcf_frer_act(struct sk_buff *skb, const struct tc_action *a, > + struct tcf_result *res) > +{ > + struct tcf_frer *frer_act = to_frer(a); > + bool ingress, individual; > + int ret, retval; > + int sequence; > + > + tcf_lastuse_update(&frer_act->tcf_tm); > + tcf_action_update_bstats(&frer_act->common, skb); > + > + retval = READ_ONCE(frer_act->tcf_action); > + > + sequence = frer_act->proto_ops->decode(skb); > + > + ingress = skb_at_tc_ingress(skb); > + individual = ingress; > + > + if (frer_act->recover) { > + spin_lock(&frer_act->tcf_lock); > + > + if (frer_act->rcvy_alg == TCA_FRER_RCVY_VECTOR_ALG) > + ret = frer_vector_rcvy_alg(frer_act, sequence, > + individual); > + else > + ret = frer_match_rcvy_alg(frer_act, sequence, > + individual); > + if (ret) { > + frer_act->tcf_qstats.drops++; > + retval = TC_ACT_SHOT; > + } > + > + if (frer_act->tag_action == TCA_FRER_TAG_POP) > + frer_act->proto_ops->tag_pop(skb, frer_act); > + > + spin_unlock(&frer_act->tcf_lock); > + > + return retval; > + } > + > + if (frer_act->tag_action == TCA_FRER_TAG_PUSH && > + sequence == FRER_RCVY_INVALID_SEQ) { > + spin_lock(&frer_act->tcf_lock); > + > + frer_seq_generation_alg(frer_act); > + > + frer_act->proto_ops->encode(skb, frer_act); > + > + spin_unlock(&frer_act->tcf_lock); > + } > + > + return retval; > +} > + > +static int tcf_frer_dump(struct sk_buff *skb, struct tc_action *a, > + int bind, int ref) > +{ > + unsigned char *b = skb_tail_pointer(skb); > + struct tcf_frer *frer_act = to_frer(a); > + struct tc_frer opt = { > + .index = frer_act->tcf_index, > + .refcnt = refcount_read(&frer_act->tcf_refcnt) - ref, > + .bindcnt = atomic_read(&frer_act->tcf_bindcnt) - bind, > + }; > + struct tcf_t t; > + > + spin_lock_bh(&frer_act->tcf_lock); > + opt.action = frer_act->tcf_action; > + > + if (nla_put(skb, TCA_FRER_PARMS, sizeof(opt), &opt)) > + goto nla_put_failure; > + > + if (nla_put_u8(skb, TCA_FRER_TAG_TYPE, frer_act->tag_type)) > + goto nla_put_failure; > + > + if (nla_put_u8(skb, TCA_FRER_TAG_ACTION, frer_act->tag_action)) > + goto nla_put_failure; > + > + if (nla_put_u8(skb, TCA_FRER_RECOVER, frer_act->recover)) > + goto nla_put_failure; > + > + if (nla_put_u8(skb, TCA_FRER_RECOVER_ALG, frer_act->rcvy_alg)) > + goto nla_put_failure; > + > + if (nla_put_u8(skb, TCA_FRER_RECOVER_HISTORY_LEN, > + frer_act->rcvy_history_len)) > + goto nla_put_failure; > + > + if (nla_put_u64_64bit(skb, TCA_FRER_RECOVER_RESET_TM, > + frer_act->rcvy_reset_msec, TCA_FRER_PAD)) > + goto nla_put_failure; > + > + if (nla_put_u32(skb, TCA_FRER_RECOVER_TAGLESS_PKTS, > + frer_act->cps_seq_rcvy_tagless_pkts)) > + goto nla_put_failure; > + > + if (nla_put_u32(skb, TCA_FRER_RECOVER_OUT_OF_ORDER_PKTS, > + frer_act->cps_seq_rcvy_out_of_order_pkts)) > + goto nla_put_failure; > + > + if (nla_put_u32(skb, TCA_FRER_RECOVER_ROGUE_PKTS, > + frer_act->cps_seq_rcvy_rogue_pkts)) > + goto nla_put_failure; > + > + if (nla_put_u32(skb, TCA_FRER_RECOVER_LOST_PKTS, > + frer_act->cps_seq_rcvy_lost_pkts)) > + goto nla_put_failure; > + > + if (nla_put_u32(skb, TCA_FRER_RECOVER_RESETS, > + frer_act->cps_seq_rcvy_resets)) > + goto nla_put_failure; > + > + tcf_tm_dump(&t, &frer_act->tcf_tm); > + if (nla_put_64bit(skb, TCA_FRER_TM, sizeof(t), > + &t, TCA_FRER_PAD)) > + goto nla_put_failure; > + spin_unlock_bh(&frer_act->tcf_lock); > + > + return skb->len; > + > +nla_put_failure: > + spin_unlock_bh(&frer_act->tcf_lock); > + nlmsg_trim(skb, b); > + > + return -1; > +} > + > +static int tcf_frer_walker(struct net *net, struct sk_buff *skb, > + struct netlink_callback *cb, int type, > + const struct tc_action_ops *ops, > + struct netlink_ext_ack *extack) > +{ > + struct tc_action_net *tn = net_generic(net, frer_net_id); > + > + return tcf_generic_walker(tn, skb, cb, type, ops, extack); > +} > + > +static int tcf_frer_search(struct net *net, struct tc_action **a, u32 index) > +{ > + struct tc_action_net *tn = net_generic(net, frer_net_id); > + > + return tcf_idr_search(tn, a, index); > +} > + > +static void tcf_frer_stats_update(struct tc_action *a, u64 bytes, u64 packets, > + u64 drops, u64 lastuse, bool hw) > +{ > + struct tcf_frer *frer_act = to_frer(a); > + struct tcf_t *tm = &frer_act->tcf_tm; > + > + tcf_action_update_stats(a, bytes, packets, drops, hw); > + tm->lastuse = max_t(u64, tm->lastuse, lastuse); > +} > + > +static void tcf_frer_cleanup(struct tc_action *a) > +{ > + struct tcf_frer *frer_act = to_frer(a); > + > + if (frer_act->rcvy_reset_msec) > + hrtimer_cancel(&frer_act->hrtimer); I could be missing something, but it seems that you initialized the hrtimer if ->recover and ->rcvy_reset_msec were different from zero. I think this can cause a non-initialized hrtimer to be cancelled, if the user set ->recover to zero and ->rcvy_reset_msec to not zero. Perhaps adding some policy checks for valid values of TCA_FRER_RECOVER and friends would help? Documenting what the different configuration parameters mean would be nice as well. > +} > + > +static size_t tcf_frer_get_fill_size(const struct tc_action *act) > +{ > + return nla_total_size(sizeof(struct tc_frer)); > +} > + > +static struct tc_action_ops act_frer_ops = { > + .kind = "frer", > + .id = TCA_ID_FRER, > + .owner = THIS_MODULE, > + .act = tcf_frer_act, > + .init = tcf_frer_init, > + .cleanup = tcf_frer_cleanup, > + .dump = tcf_frer_dump, > + .walk = tcf_frer_walker, > + .stats_update = tcf_frer_stats_update, > + .get_fill_size = tcf_frer_get_fill_size, > + .lookup = tcf_frer_search, > + .size = sizeof(struct tcf_frer), > +}; > + > +static __net_init int frer_init_net(struct net *net) > +{ > + struct tc_action_net *tn = net_generic(net, frer_net_id); > + > + return tc_action_net_init(net, tn, &act_frer_ops); > +} > + > +static void __net_exit frer_exit_net(struct list_head *net_list) > +{ > + tc_action_net_exit(net_list, frer_net_id); > +}; > + > +static struct pernet_operations frer_net_ops = { > + .init = frer_init_net, > + .exit_batch = frer_exit_net, > + .id = &frer_net_id, > + .size = sizeof(struct tc_action_net), > +}; > + > +static int __init frer_init_module(void) > +{ > + return tcf_register_action(&act_frer_ops, &frer_net_ops); > +} > + > +static void __exit frer_cleanup_module(void) > +{ > + tcf_unregister_action(&act_frer_ops, &frer_net_ops); > +} > + > +module_init(frer_init_module); > +module_exit(frer_cleanup_module); > +MODULE_LICENSE("GPL v2"); > diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c > index 2ef8f5a6205a..353184987427 100644 > --- a/net/sched/cls_api.c > +++ b/net/sched/cls_api.c > @@ -39,6 +39,7 @@ > #include <net/tc_act/tc_ct.h> > #include <net/tc_act/tc_mpls.h> > #include <net/tc_act/tc_gate.h> > +#include <net/tc_act/tc_frer.h> > #include <net/flow_offload.h> > > extern const struct nla_policy rtm_tca_policy[TCA_MAX + 1]; > @@ -3706,6 +3707,16 @@ int tc_setup_flow_action(struct flow_action *flow_action, > err = tcf_gate_get_entries(entry, act); > if (err) > goto err_out_locked; > + } else if (is_tcf_frer(act)) { > + entry->id = FLOW_ACTION_FRER; > + entry->frer.tag_type = to_frer(act)->tag_type; > + entry->frer.tag_action = to_frer(act)->tag_action; > + entry->frer.recover = to_frer(act)->recover; > + entry->frer.rcvy_alg = to_frer(act)->rcvy_alg; > + entry->frer.rcvy_history_len = > + to_frer(act)->rcvy_history_len; > + entry->frer.rcvy_reset_msec = > + to_frer(act)->rcvy_reset_msec; > } else { > err = -EOPNOTSUPP; > goto err_out_locked; > -- > 2.17.1 >
On 2021. 09. 28. 13:44, Xiaoliang Yang wrote: > This patch introduce a frer action to implement frame replication and > elimination for reliability, which is defined in IEEE P802.1CB. HiXiaoliang! thanks for your efforts to introduce afreraction to implement frame replication and elimination for reliability, which is defined in IEEE P802.1CB-2017. I would like to relay a small comment from our team, regarding to the FRER, not particularly to the code. Support of RTAG format is very straightforward. Since 2017, several maintenance items were opened regarding IEEE P802.1CB-2017 to fix some errors in the standard. Discussions results will be published soon e.g., in IEEE P802.1CBdb (https://1.ieee802.org/tsn/802-1cbdb/). One of the maintenance items impacts the vector recovery algorithm itself. Details on the problem and the solution are here: -https://www.802-1.org/items/370 -https://www.ieee802.org/1/files/public/docs2020/maint-varga-257-FRER-recovery-window-0320-v01.pdf <https://www.ieee802.org/1/files/public/docs2020/maint-varga-257-FRER-recovery-window-0320-v01.pdf> It is a small but important fix. There is an incorrect reference to the size of the recovery window, when a received packet is checked to be out-of-range or not. Without this fix the vector recovery algorithm do not work properly in some scenarios. Please consider to update your patch to reflect the maintenance efforts of IEEE to correct .1CB-2017 related issues. > There are two modes for frer action: generate and push the tag, recover > and pop the tag. frer tag has three types: RTAG, HSR, and PRP. This > patch only supports RTAG now. > > User can push the tag on egress port of the talker device, recover and > pop the tag on ingress port of the listener device. When it's a relay > system, push the tag on ingress port, or set individual recover on > ingress port. Set the sequence recover on egress port. > > Use action "mirred" to do split function, and use "vlan-modify" to do > active stream identification function on relay system. > All of our research in the topic based on a in-house userspace FRER implementation but we are looking forward to test your work in the future. Thanks, Ferenc
Hi Ferenc, (I adjusted the CC list) On Fri, May 06, 2022 at 11:55:56AM +0000, Ferenc Fejes wrote: > On 2021. 09. 28. 13:44, Xiaoliang Yang wrote: > > This patch introduce a frer action to implement frame replication and > > elimination for reliability, which is defined in IEEE P802.1CB. > > HiXiaoliang! > > thanks for your efforts to introduce afreraction to implement frame > replication and elimination for reliability, which is defined in IEEE > P802.1CB-2017. I would like to relay a small comment from our team, > regarding to the FRER, not particularly to the code. > > Support of RTAG format is very straightforward. > > Since 2017, several maintenance items were opened regarding IEEE > P802.1CB-2017 to fix some errors in the standard. Discussions results > will be published soon e.g., in IEEE P802.1CBdb > (https://1.ieee802.org/tsn/802-1cbdb/). > > One of the maintenance items impacts the vector recovery algorithm itself. > > Details on the problem and the solution are here: > > -https://www.802-1.org/items/370 > > -https://www.ieee802.org/1/files/public/docs2020/maint-varga-257-FRER-recovery-window-0320-v01.pdf > <https://www.ieee802.org/1/files/public/docs2020/maint-varga-257-FRER-recovery-window-0320-v01.pdf> > > It is a small but important fix. There is an incorrect reference to the > size of the recovery window, when a received packet is checked to be > out-of-range or not. Without this fix the vector recovery algorithm do > not work properly in some scenarios. > > Please consider to update your patch to reflect the maintenance efforts > of IEEE to correct .1CB-2017 related issues. > > > There are two modes for frer action: generate and push the tag, recover > > and pop the tag. frer tag has three types: RTAG, HSR, and PRP. This > > patch only supports RTAG now. > > > > User can push the tag on egress port of the talker device, recover and > > pop the tag on ingress port of the listener device. When it's a relay > > system, push the tag on ingress port, or set individual recover on > > ingress port. Set the sequence recover on egress port. > > > > Use action "mirred" to do split function, and use "vlan-modify" to do > > active stream identification function on relay system. > > > All of our research in the topic based on a in-house userspace FRER > implementation but we are looking forward to test your work in the future. > > Thanks, > > Ferenc Glad to see someone familiar with 802.1CB. I have a few questions and concerns if you don't mind. I think we are seeing a bit of a stall on the topic of FRER modeling in the Linux networking stack, in no small part due to the fact that we are working with pre-standard hardware. The limitation with Xiaoliang's proposal here (to model FRER stream replication and recovery as a tc action) is that I don't think it works well for traffic termination - it only covers properly the use case of a switch. More precisely, there isn't a single convergent termination point for either locally originating traffic, or locally received traffic (i.e. you, as user, don't know on which interface of several available to open a socket). In our hardware, this limitation isn't really visible because of the way in which the Ethernet switch is connected inside the NXP LS1028A. It is something like this: +---------------------------------------+ | | | +------+ +------+ | | | eno2 | | eno3 | | | +------+ +------+ | | | | | | +------+ +------+ | | | swp4 | | swp5 | | | +------+ +------+ | | +------+ +------+ +------+ +------+ | | | swp0 | | swp1 | | swp2 | | swp3 | | +--+------+-+------+-+------+-+------+--+ In the above picture, the switch ports swp0-swp3 have eno3 as a DSA master (connected to the internal swp5, a CPU port). The other internal port, swp5, is configured as a DSA user port, so it has a net device. Analogously, while eno3 is a DSA master and receives DSA-tagged traffic (so it is useless for direct IP termination), eno2 receives DSA untagged traffic and is therefore an IP termination endpoint into a switched network. What we do in this case is put tc-frer rules for stream replication and recovery on swp4 itself, and we use eno2 as the convergence point for locally terminated streams. However, naturally, a hardware design that does not look like this can't terminate traffic like this. My idea was that it might be better if FRER was its own virtual network interface (like a bridge), with multiple slave interfaces. The FRER net device could keep its own database of streams and actions (completely outside of tc) which would be managed similar to "bridge fdb add ...". This way, the frer0 netdevice would be the local termination endpoint, logically speaking. What I don't know for sure is if a FRER netdevice is supposed to forward frames which aren't in its list of streams (and if so, by which rules). Because if a FRER netdevice is supposed to behave like a regular bridge for non-streams, the implication is that the FRER logic should then be integrated into the Linux bridge. Also, this new FRER software model complicates the offloading on NXP LS1028A, but let's leave that aside, since it shouldn't really be the decisive factor on what should the software model look like. Do you have any comments on this topic?
Hi Vladimir! I adjusted the CC list too with my colleagues. On 2022. 05. 06. 14:23, Vladimir Oltean wrote: > Hi Ferenc, > > (I adjusted the CC list) > > On Fri, May 06, 2022 at 11:55:56AM +0000, Ferenc Fejes wrote: >> On 2021. 09. 28. 13:44, Xiaoliang Yang wrote: >>> This patch introduce a frer action to implement frame replication and >>> elimination for reliability, which is defined in IEEE P802.1CB. >> HiXiaoliang! >> >> thanks for your efforts to introduce afreraction to implement frame >> replication and elimination for reliability, which is defined in IEEE >> P802.1CB-2017. I would like to relay a small comment from our team, >> regarding to the FRER, not particularly to the code. >> >> Support of RTAG format is very straightforward. >> >> Since 2017, several maintenance items were opened regarding IEEE >> P802.1CB-2017 to fix some errors in the standard. Discussions results >> will be published soon e.g., in IEEE P802.1CBdb >> (https://protect2.fireeye.com/v1/url?k=31323334-501d5122-313273af-454445555731-a50148cf7cc14d37&q=1&e=bac1cc80-3c80-4916-be67-352e21564815&u=https%3A%2F%2F1.ieee802.org%2Ftsn%2F802-1cbdb%2F). >> >> One of the maintenance items impacts the vector recovery algorithm itself. >> >> Details on the problem and the solution are here: >> >> -https://protect2.fireeye.com/v1/url?k=31323334-501d5122-313273af-454445555731-7de6c1f45efd0a2c&q=1&e=bac1cc80-3c80-4916-be67-352e21564815&u=https%3A%2F%2Fwww.802-1.org%2Fitems%2F370 >> >> -https://protect2.fireeye.com/v1/url?k=31323334-501d5122-313273af-454445555731-cc75f6c9f6a68939&q=1&e=bac1cc80-3c80-4916-be67-352e21564815&u=https%3A%2F%2Fwww.ieee802.org%2F1%2Ffiles%2Fpublic%2Fdocs2020%2Fmaint-varga-257-FRER-recovery-window-0320-v01.pdf >> <https://protect2.fireeye.com/v1/url?k=31323334-501d5122-313273af-454445555731-cc75f6c9f6a68939&q=1&e=bac1cc80-3c80-4916-be67-352e21564815&u=https%3A%2F%2Fwww.ieee802.org%2F1%2Ffiles%2Fpublic%2Fdocs2020%2Fmaint-varga-257-FRER-recovery-window-0320-v01.pdf> >> >> It is a small but important fix. There is an incorrect reference to the >> size of the recovery window, when a received packet is checked to be >> out-of-range or not. Without this fix the vector recovery algorithm do >> not work properly in some scenarios. >> >> Please consider to update your patch to reflect the maintenance efforts >> of IEEE to correct .1CB-2017 related issues. >> >>> There are two modes for frer action: generate and push the tag, recover >>> and pop the tag. frer tag has three types: RTAG, HSR, and PRP. This >>> patch only supports RTAG now. >>> >>> User can push the tag on egress port of the talker device, recover and >>> pop the tag on ingress port of the listener device. When it's a relay >>> system, push the tag on ingress port, or set individual recover on >>> ingress port. Set the sequence recover on egress port. >>> >>> Use action "mirred" to do split function, and use "vlan-modify" to do >>> active stream identification function on relay system. >>> >> All of our research in the topic based on a in-house userspace FRER >> implementation but we are looking forward to test your work in the future. >> >> Thanks, >> >> Ferenc > Glad to see someone familiar with 802.1CB. I have a few questions and > concerns if you don't mind. I CCd Balazs Varga and Janos Farkas, experts of the TSN topics including 802.1CB as well. Istvan Moldovan's can also give valuable feedback as the author of our in-house userspace FRER. I'll also try my best to answer but I'm the least competent in the topic. > > I think we are seeing a bit of a stall on the topic of FRER modeling in > the Linux networking stack, in no small part due to the fact that we are > working with pre-standard hardware. > > The limitation with Xiaoliang's proposal here (to model FRER stream > replication and recovery as a tc action) is that I don't think it works > well for traffic termination - it only covers properly the use case of a > switch. More precisely, there isn't a single convergent termination > point for either locally originating traffic, or locally received > traffic (i.e. you, as user, don't know on which interface of several > available to open a socket). > > In our hardware, this limitation isn't really visible because of the way > in which the Ethernet switch is connected inside the NXP LS1028A. We have some NXP LS1028As as well so at least I familiar with the box :-) > It is something like this: > > +---------------------------------------+ > | | > | +------+ +------+ | > | | eno2 | | eno3 | | > | +------+ +------+ | > | | | | > | +------+ +------+ | > | | swp4 | | swp5 | | > | +------+ +------+ | > | +------+ +------+ +------+ +------+ | > | | swp0 | | swp1 | | swp2 | | swp3 | | > +--+------+-+------+-+------+-+------+--+ > > In the above picture, the switch ports swp0-swp3 have eno3 as a DSA > master (connected to the internal swp5, a CPU port). The other internal > port, swp5, is configured as a DSA user port, so it has a net device. > Analogously, while eno3 is a DSA master and receives DSA-tagged traffic > (so it is useless for direct IP termination), eno2 receives DSA untagged > traffic and is therefore an IP termination endpoint into a switched > network. Unfortunately I'm not familiar with the distributed switch architecture (I only read a netdev paper from that and thats all) but I try to grasp on the problem. In my understanding, the main issue is the distinction between the locally terminated and forwarded TSN streams, because currently the DSA metadata tags are required to do that? Can you explain the problem for one who not familiar with DSA? > > What we do in this case is put tc-frer rules for stream replication and > recovery on swp4 itself, and we use eno2 as the convergence point for > locally terminated streams. > > However, naturally, a hardware design that does not look like this can't > terminate traffic like this. Yes, this is my concern too. What would be a nice to have thing if the user can configure the SW implementation and the HW offload with the same commands and the original tc-frer approach fits well to this concept. Anything towards that direction is the way forward IMO, even if the underlying implementation will change. > > My idea was that it might be better if FRER was its own virtual network > interface (like a bridge), with multiple slave interfaces. The FRER net > device could keep its own database of streams and actions (completely > outside of tc) which would be managed similar to "bridge fdb add ...". > This way, the frer0 netdevice would be the local termination endpoint, > logically speaking. Interesting approach. To be honest I dont see the long term implications of this solution, others might have ideas about the pros and cons, but that looks like a solution where local stream termination is trivial. > What I don't know for sure is if a FRER netdevice is supposed to forward > frames which aren't in its list of streams (and if so, by which rules). Yes this sounds correct, somehow non-local packets should be forwarded too with a bridge. Is it possible to the linux bridge recognize if one port is a frer0 port (or on the frer0 if that is enslaved) and do the forwarding of the streams? Re-implementing bridge functions just for the frer device would be redundant. Unfortunately I never dug myself deep enough into the linux bridge code, just when debugged VXLAN ARP suppression for EVPN, but I think it would be possible to exchange some metadatas between the bridge and the frer device to do the forwarding/terminating decision, something like here [0] > Because if a FRER netdevice is supposed to behave like a regular bridge > for non-streams, the implication is that the FRER logic should then be > integrated into the Linux bridge. This is (for me) more appealing. Also we can keep that in mind when Linux will support deterministic layer3 networking (IETF DetNet WG RFCs) it would be nice to have mapping between TSN and DetNet streams, then forward the packets on DetNet tunnels as well (with different endpoints). This is something our team researching so Balazs and Istvan might give you some info about that. But I admit that thinking about playing nicely with DetNet in regard of the current linux FRER implementation is more than overwhelming, but the Linux bridge would be a nice place to map TSN flows to DetNet flow like currently EVPN maps VLANs to VXLANs. > Also, this new FRER software model complicates the offloading on NXP > LS1028A, but let's leave that aside, since it shouldn't really be the > decisive factor on what should the software model look like. > > Do you have any comments on this topic? I would like to see if others can join to the discussion as well, I will try to think about this problem more too. [0] https://lore.kernel.org/netdev/20220301050439.31785-10-roopa@nvidia.com/ Best, Ferenc
On Fri, May 06, 2022 at 02:44:17PM +0000, Ferenc Fejes wrote: > > Glad to see someone familiar with 802.1CB. I have a few questions and > > concerns if you don't mind. > > I CCd Balazs Varga and Janos Farkas, experts of the TSN topics > including 802.1CB as well. Istvan Moldovan's can also give valuable > feedback as the author of our in-house userspace FRER. I'll also try my > best to answer but I'm the least competent in the topic. > Nope, that would probably be me ;) I am commenting on Xiaoliang's patch without having even run it, and I have only looked through the code diagonally, and I'm not exactly an expert on the use cases that drove the standard either. So plenty of chances to make mistakes. But nonetheless I hope that by explaining to me where I'm wrong we'll be able to make progress with this. > > > > I think we are seeing a bit of a stall on the topic of FRER modeling in > > the Linux networking stack, in no small part due to the fact that we are > > working with pre-standard hardware. > > > > The limitation with Xiaoliang's proposal here (to model FRER stream > > replication and recovery as a tc action) is that I don't think it works > > well for traffic termination - it only covers properly the use case of a > > switch. More precisely, there isn't a single convergent termination > > point for either locally originating traffic, or locally received > > traffic (i.e. you, as user, don't know on which interface of several > > available to open a socket). > > > > In our hardware, this limitation isn't really visible because of the way > > in which the Ethernet switch is connected inside the NXP LS1028A. > > We have some NXP LS1028As as well so at least I familiar with the box :-) Cool, this means we'll eventually reach a common understanding of the topic. > > It is something like this: > > > > +---------------------------------------+ > > | | > > | +------+ +------+ | > > | | eno2 | | eno3 | | > > | +------+ +------+ | > > | | | | > > | +------+ +------+ | > > | | swp4 | | swp5 | | > > | +------+ +------+ | > > | +------+ +------+ +------+ +------+ | > > | | swp0 | | swp1 | | swp2 | | swp3 | | > > +--+------+-+------+-+------+-+------+--+ > > > > In the above picture, the switch ports swp0-swp3 have eno3 as a DSA > > master (connected to the internal swp5, a CPU port). The other internal > > port, swp5, is configured as a DSA user port, so it has a net device. > > Analogously, while eno3 is a DSA master and receives DSA-tagged traffic > > (so it is useless for direct IP termination), eno2 receives DSA untagged > > traffic and is therefore an IP termination endpoint into a switched > > network. > > Unfortunately I'm not familiar with the distributed switch architecture > (I only read a netdev paper from that and thats all) but I try to grasp > on the problem. > In my understanding, the main issue is the distinction between the > locally terminated and forwarded TSN streams, because currently the DSA > metadata tags are required to do that? Can you explain the problem for > one who not familiar with DSA? Forget about DSA, what I'm trying to get at is that you might one day read the release notes of the Linux kernel and see that it gained support for FRER using tc, and get all excited, download and compile it, set up 2 machines connected through 2 port pairs, and try to configure the systems to ping each other redundantly, to become familiar with how it works. Start with something simple, what can be so hard about a ping ;) You'll say something along the lines of 1. ok, I have 2 IP addresses, so I need 2 streams, one A -> B and one B -> A 2. I want to use the null stream identification function (MAC DA, VLAN ID for those following along) so I have to resolve each IP address to a MAC address to use as a stream identifier, but how? since the 2 Ethernet cards on each system have different MAC addresses. Anyway, pick one and put the other card in promisc for now. 3. I have the MACs now, I want to configure the streams. The stream "A -> B" needs to be configured for splitting on the first system, and for sequence recovery on the second system. The stream "B -> A" needs to be configured for recovery on the first system and for splitting on the second. 4. Let's start with splitting, this is just the "mirred egress mirror" action, nothing FRER specific about it. There's also the "frer rtag tag-action tag-push" action which adds the redundancy tag. Good thing these actions can be chained. So let's put a filter on the egress qdisc of eth0, that matches on the MAC address of B, and has a mirred mirror action to eth1, and a "rtag tag-push" action. Notice how by this time, eth0 becomes sort of a "primary" interface and eth1 sort of a "secondary" interface. So if you ping, you need to use eth0. What if the link goes down on eth0 you ask, how does the "redundancy" in "frer" come into play, with the traffic still going through eth1? No time to ask questions like that, let's move on. 5. Let's say that both links are up, and system B is receiving a replicated stream with FRER tags on both eth0 and eth1. It wants to eliminate the duplicates and see a continuous flow of ICMP requests without the extra FRER tag. Back to the documentation. We see 2 kinds of stream recovery, one is "individual" recovery which is a "frer rtag recover" action put on the ingress qdisc of an interface, and the other is just "recovery", which is the same action but put on the egress qdisc. We don't want individual sequence recovery processes on eth0 and eth1 of station B, since those won't consider the packets as being members of the same stream, and the'll still be duplicated. So we want the normal recovery. But on whose netdev's egress qdisc do we put the "rtag recover" action? Both eth0 and eth1 are receiving. There is no central convergence point. Now you're stumped and thinking, how is this supposed to be used? What can you do with it? I mean, I can probably create a veth pair as that aforementioned missing convergence point, and guide packets from {eth0, eth1} towards the lefthand side of the veth pair, using mirred redirect. Then I can put the frer rules on the egress qdisc of the lefthand side of the veth pair, and recover the plaintext traffic (no duplicates, no RTAG) on the righthand side of the veth pair. But... seriously? And there is not even one mention of this in the documentation? And even so. You need to send the request through eno0 and expect to receive the reply through a veth interface? How is any user space application ever going to work? Now comes the connection with DSA. Xiaoliang made tc-frer with LS1028A offloading in mind. No criticism there, after all it is the hardware we are working with. The intended usage pattern is to put the FRER rules on the switch port netdevices, and to do the termination on the switch-unaware netdevices. In other words, it's as if eno2 is connected to a completely external RedBox, and tc-frer only serves externally received traffic. Except that those 2 isolated parts of the system are physically embedded in one. So at step (1) you put the IP on eno2, at step (2) you choose the MAC address for the stream to be that of eno2, at step (4) you configure the split action (mirred towards the external ports, plus FRER tag push) on the _ingress_ of swp4 (traffic sent by eno2 is received by swp4). At step (5) you put the sequence recovery on the _egress_ of swp4 (traffic that egresses swp4 ingresses eno2). So then you might ask, what would we do if we didn't have that eno2 <-> swp4 port pair? Is tc-frer useful for someone who doesn't, but is maybe even able to offload 802.1CB streams, including termination, through some other paradigm? The thing is that, as far as I can tell, Linux does not really like to set up a network for the exclusive use of others (pure forwarding), to which it has no local access. This is essentially the design of tc-frer, and my issue with it. > > > > What we do in this case is put tc-frer rules for stream replication and > > recovery on swp4 itself, and we use eno2 as the convergence point for > > locally terminated streams. > > > > However, naturally, a hardware design that does not look like this can't > > terminate traffic like this. > > Yes, this is my concern too. What would be a nice to have thing if the > user can configure the SW implementation and the HW offload with the > same commands and the original tc-frer approach fits well to this > concept. Anything towards that direction is the way forward IMO, even if > the underlying implementation will change. > > > > My idea was that it might be better if FRER was its own virtual network > > interface (like a bridge), with multiple slave interfaces. The FRER net > > device could keep its own database of streams and actions (completely > > outside of tc) which would be managed similar to "bridge fdb add ...". > > This way, the frer0 netdevice would be the local termination endpoint, > > logically speaking. > > Interesting approach. To be honest I dont see the long term implications > of this solution, others might have ideas about the pros and cons, but > that looks like a solution where local stream termination is trivial. The implication is that you can easily do stuff with FRER. Maybe I'm relying too much on ping as an example, but I am really lacking real life use cases. Feedback here would be extremely appreciated. > > What I don't know for sure is if a FRER netdevice is supposed to forward > > frames which aren't in its list of streams (and if so, by which rules). > > Yes this sounds correct, somehow non-local packets should be forwarded > too with a bridge. Is it possible to the linux bridge recognize if one > port is a frer0 port (or on the frer0 if that is enslaved) and do the > forwarding of the streams? Re-implementing bridge functions just for the > frer device would be redundant. Unfortunately I never dug myself deep > enough into the linux bridge code, just when debugged VXLAN ARP > suppression for EVPN, but I think it would be possible to exchange some > metadatas between the bridge and the frer device to do the > forwarding/terminating decision, something like here [0] The other question if you're in favor of "FRER as net device" is whether we should have a FRER interface per TSN stream (or per stream pair, RX and TX, since streams are unidirectional), or a FRER interface for all TSN streams. If the latter, we're moving more towards "FRER integrated in bridge" territory. Or... maybe even resolve local termination through some other mechanism, and still build on top of a tc-frer action. The thing with "FRER as net device" on the other hand is that we've already started modeling PSFP through tc. So if the FRER device has its own rules, then "these" streams are not the same as "those" streams, and a user would have to duplicate parts of the configuration. Whereas I think the PSFP standard refers to stream identifiers directly from 802.1CB. > > Because if a FRER netdevice is supposed to behave like a regular bridge > > for non-streams, the implication is that the FRER logic should then be > > integrated into the Linux bridge. > > This is (for me) more appealing. Also we can keep that in mind when > Linux will support deterministic layer3 networking (IETF DetNet WG RFCs) > it would be nice to have mapping between TSN and DetNet streams, then > forward the packets on DetNet tunnels as well (with different > endpoints). This is something our team researching so Balazs and Istvan > might give you some info about that. But I admit that thinking about > playing nicely with DetNet in regard of the current linux FRER > implementation is more than overwhelming, but the Linux bridge would be > a nice place to map TSN flows to DetNet flow like currently EVPN maps > VLANs to VXLANs. So what would be the use case for bridging packets belonging to unrecognized TSN streams? In my toy setups I almost ran out of ideas how to drop unwanted traffic and prevent it from being looped forever. STP, MSTP, MRP are all out the window, this is active redundancy, you need to embrace the loops, so it isn't as if you can pretend that something sane is going to happen with a packet if it isn't part of a stream that gets special handling from 802.1CB. No broadcast, no multicast, and self address filtering on all switch ports. > > Also, this new FRER software model complicates the offloading on NXP > > LS1028A, but let's leave that aside, since it shouldn't really be the > > decisive factor on what should the software model look like. > > > > Do you have any comments on this topic? > I would like to see if others can join to the discussion as well, I will > try to think about this problem more too. > > [0] https://lore.kernel.org/netdev/20220301050439.31785-10-roopa@nvidia.com/ > > Best, > Ferenc
Hi, It is an interesting conversation, see my comments below. > On Fri, May 06, 2022 at 02:44:17PM +0000, Ferenc Fejes wrote: >> > Glad to see someone familiar with 802.1CB. I have a few questions and >> > concerns if you don't mind. >> >> I CCd Balazs Varga and Janos Farkas, experts of the TSN topics >> including 802.1CB as well. Istvan Moldovan's can also give valuable >> feedback as the author of our in-house userspace FRER. I'll also try my >> best to answer but I'm the least competent in the topic. >> > Nope, that would probably be me ;) > I am commenting on Xiaoliang's patch without having even run it, and I > have only looked through the code diagonally, and I'm not exactly an > expert on the use cases that drove the standard either. So plenty of > chances to make mistakes. But nonetheless I hope that by explaining to > me where I'm wrong we'll be able to make progress with this. >> > >> > I think we are seeing a bit of a stall on the topic of FRER modeling in >> > the Linux networking stack, in no small part due to the fact that we are >> > working with pre-standard hardware. >> > >> > The limitation with Xiaoliang's proposal here (to model FRER stream >> > replication and recovery as a tc action) is that I don't think it works >> > well for traffic termination - it only covers properly the use case of a >> > switch. More precisely, there isn't a single convergent termination >> > point for either locally originating traffic, or locally received >> > traffic (i.e. you, as user, don't know on which interface of several >> > available to open a socket). >> > >> > In our hardware, this limitation isn't really visible because of the way >> > in which the Ethernet switch is connected inside the NXP LS1028A. >> >> We have some NXP LS1028As as well so at least I familiar with the box :-) > Cool, this means we'll eventually reach a common understanding of the > topic. >> > It is something like this: >> > >> > +---------------------------------------+ >> > | | >> > | +------+ +------+ | >> > | | eno2 | | eno3 | | >> > | +------+ +------+ | >> > | | | | >> > | +------+ +------+ | >> > | | swp4 | | swp5 | | >> > | +------+ +------+ | >> > | +------+ +------+ +------+ +------+ | >> > | | swp0 | | swp1 | | swp2 | | swp3 | | >> > +--+------+-+------+-+------+-+------+--+ >> > >> > In the above picture, the switch ports swp0-swp3 have eno3 as a DSA >> > master (connected to the internal swp5, a CPU port). The other internal >> > port, swp5, is configured as a DSA user port, so it has a net device. >> > Analogously, while eno3 is a DSA master and receives DSA-tagged traffic >> > (so it is useless for direct IP termination), eno2 receives DSA untagged >> > traffic and is therefore an IP termination endpoint into a switched >> > network. >> >> Unfortunately I'm not familiar with the distributed switch architecture >> (I only read a netdev paper from that and thats all) but I try to grasp >> on the problem. >> In my understanding, the main issue is the distinction between the >> locally terminated and forwarded TSN streams, because currently the DSA >> metadata tags are required to do that? Can you explain the problem for >> one who not familiar with DSA? > Forget about DSA, what I'm trying to get at is that you might one day > read the release notes of the Linux kernel and see that it gained > support for FRER using tc, and get all excited, download and compile it, > set up 2 machines connected through 2 port pairs, and try to configure > the systems to ping each other redundantly, to become familiar with how > it works. Start with something simple, what can be so hard about a ping ;) > You'll say something along the lines of > 1. ok, I have 2 IP addresses, so I need 2 streams, one A -> B and one B -> A Don't forget about the background traffic. Nothing will work if ARP is not working, and ARP packets have broadcast destination (they will not be identified as part of the streams). So besides the FRER forwarding, normal bridging should also be working! > 2. I want to use the null stream identification function (MAC DA, VLAN ID > for those following along) so I have to resolve each IP address to a > MAC address to use as a stream identifier, but how? since the 2 > Ethernet cards on each system have different MAC addresses. Anyway, > pick one and put the other card in promisc for now. TSN streams by definition are Layer 2, so we suppose that MAC addresses (and VLANs) are known. > 3. I have the MACs now, I want to configure the streams. The stream "A -> B" > needs to be configured for splitting on the first system, and for > sequence recovery on the second system. The stream "B -> A" needs to > be configured for recovery on the first system and for splitting on > the second. > 4. Let's start with splitting, this is just the "mirred egress mirror" > action, nothing FRER specific about it. There's also the "frer rtag > tag-action tag-push" action which adds the redundancy tag. Good thing > these actions can be chained. So let's put a filter on the egress > qdisc of eth0, that matches on the MAC address of B, and has a mirred > mirror action to eth1, and a "rtag tag-push" action. Notice how by > this time, eth0 becomes sort of a "primary" interface and eth1 sort > of a "secondary" interface. So if you ping, you need to use eth0. > What if the link goes down on eth0 you ask, how does the "redundancy" > in "frer" come into play, with the traffic still going through eth1? > No time to ask questions like that, let's move on. Well, there should be no "primary" or "secondary". The two interfaces should be equally handled, otherwise a lot of other issues appear... > 5. Let's say that both links are up, and system B is receiving a > replicated stream with FRER tags on both eth0 and eth1. It wants to > eliminate the duplicates and see a continuous flow of ICMP requests > without the extra FRER tag. Back to the documentation. We see 2 kinds > of stream recovery, one is "individual" recovery which is a > "frer rtag recover" action put on the ingress qdisc of an interface, > and the other is just "recovery", which is the same action but put on > the egress qdisc. We don't want individual sequence recovery processes > on eth0 and eth1 of station B, since those won't consider the packets > as being members of the same stream, and the'll still be duplicated. > So we want the normal recovery. But on whose netdev's egress qdisc do > we put the "rtag recover" action? Both eth0 and eth1 are receiving. > There is no central convergence point. > Now you're stumped and thinking, how is this supposed to be used? > What can you do with it? I mean, I can probably create a veth pair as > that aforementioned missing convergence point, and guide packets from > {eth0, eth1} towards the lefthand side of the veth pair, using mirred > redirect. > Then I can put the frer rules on the egress qdisc of the lefthand side > of the veth pair, and recover the plaintext traffic (no duplicates, no > RTAG) on the righthand side of the veth pair. But... seriously? > And there is not even one mention of this in the documentation? > And even so. You need to send the request through eno0 and expect to > receive the reply through a veth interface? How is any user space > application ever going to work? We definitely need a convergence point for the elimination part. Frames from both redundant paths should be received by the elimination function. A virtual interface (like tap0 ) or a special netdevice could be the convergence point. > Now comes the connection with DSA. Xiaoliang made tc-frer with LS1028A > offloading in mind. No criticism there, after all it is the hardware we > are working with. > The intended usage pattern is to put the FRER rules on the switch port > netdevices, and to do the termination on the switch-unaware netdevices. > In other words, it's as if eno2 is connected to a completely external > RedBox, and tc-frer only serves externally received traffic. Except that > those 2 isolated parts of the system are physically embedded in one. > So at step (1) you put the IP on eno2, at step (2) you choose the MAC > address for the stream to be that of eno2, at step (4) you configure the > split action (mirred towards the external ports, plus FRER tag push) on > the _ingress_ of swp4 (traffic sent by eno2 is received by swp4). > At step (5) you put the sequence recovery on the _egress_ of swp4 > (traffic that egresses swp4 ingresses eno2). > So then you might ask, what would we do if we didn't have that eno2 <-> > swp4 port pair? Is tc-frer useful for someone who doesn't, but is maybe > even able to offload 802.1CB streams, including termination, through > some other paradigm? The thing is that, as far as I can tell, Linux does > not really like to set up a network for the exclusive use of others > (pure forwarding), to which it has no local access. This is essentially > the design of tc-frer, and my issue with it. I think the DSA case is a special one, and the solution should also work having just two NICs. Besides having a tap/veth port we can also put the replication/ elimination point to the bridge. Of course, in that case the we don't need a virtual interface, but instead we are tied to the Linux bridge. The HW offload could also work - but a bit differently. I'm not saying this is a better solution, but it is an other way to implement FRER, and it has the advantage of handling the background traffic as well. >> > >> > What we do in this case is put tc-frer rules for stream replication and >> > recovery on swp4 itself, and we use eno2 as the convergence point for >> > locally terminated streams. >> > >> > However, naturally, a hardware design that does not look like this can't >> > terminate traffic like this. >> >> Yes, this is my concern too. What would be a nice to have thing if the >> user can configure the SW implementation and the HW offload with the >> same commands and the original tc-frer approach fits well to this >> concept. Anything towards that direction is the way forward IMO, even if >> the underlying implementation will change. >> > >> > My idea was that it might be better if FRER was its own virtual network >> > interface (like a bridge), with multiple slave interfaces. The FRER net >> > device could keep its own database of streams and actions (completely >> > outside of tc) which would be managed similar to "bridge fdb add ...". >> > This way, the frer0 netdevice would be the local termination endpoint, >> > logically speaking. >> >> Interesting approach. To be honest I dont see the long term implications >> of this solution, others might have ideas about the pros and cons, but >> that looks like a solution where local stream termination is trivial. > The implication is that you can easily do stuff with FRER. Maybe I'm > relying too much on ping as an example, but I am really lacking real > life use cases. Feedback here would be extremely appreciated. Ping is OK as test traffic, but probably in real life you can expect VLAN tagged traffic. Also, as I mentioned ARP is not part of the TSN stream, but it should go through. Just like for ping. >> > What I don't know for sure is if a FRER netdevice is supposed to forward >> > frames which aren't in its list of streams (and if so, by which rules). >> >> Yes this sounds correct, somehow non-local packets should be forwarded >> too with a bridge. Is it possible to the linux bridge recognize if one >> port is a frer0 port (or on the frer0 if that is enslaved) and do the >> forwarding of the streams? Re-implementing bridge functions just for the >> frer device would be redundant. Unfortunately I never dug myself deep >> enough into the linux bridge code, just when debugged VXLAN ARP >> suppression for EVPN, but I think it would be possible to exchange some >> metadatas between the bridge and the frer device to do the >> forwarding/terminating decision, something like here [0] I think FRER frames need to be handled before entering the bridge. Or if the bridge does the FRER, then the first thing is to identify and handle the FRER streams... > The other question if you're in favor of "FRER as net device" is whether > we should have a FRER interface per TSN stream (or per stream pair, RX > and TX, since streams are unidirectional), or a FRER interface for all > TSN streams. If the latter, we're moving more towards "FRER integrated > in bridge" territory. Or... maybe even resolve local termination through > some other mechanism, and still build on top of a tc-frer action. We don't need FRER device per stream. However, per-stream state needs to be maintained. If we have a FRER netdevice, we have to assign the related interfaces as "slave" ports. These ports will have to identify the FRER streams and forward them to the FRER netdevice, and the rest of the traffic needs to be handled normally. So besides the FRER netdevice we can still have a bridge running to handle the other traffic. On the other hand, we can still have multiple FRER netdevices, if needed, and we can assign different slave interfaces to the different FRER netdevices. > The thing with "FRER as net device" on the other hand is that we've > already started modeling PSFP through tc. So if the FRER device has its > own rules, then "these" streams are not the same as "those" streams, and > a user would have to duplicate parts of the configuration. Whereas I > think the PSFP standard refers to stream identifiers directly from 802.1CB. Unfortunately this is true, we have to configure the filtering at the ingress interface, and further configuration is needed at the FRER netdevice. >> > Because if a FRER netdevice is supposed to behave like a regular bridge >> > for non-streams, the implication is that the FRER logic should then be >> > integrated into the Linux bridge. >> >> This is (for me) more appealing. Also we can keep that in mind when >> Linux will support deterministic layer3 networking (IETF DetNet WG RFCs) >> it would be nice to have mapping between TSN and DetNet streams, then >> forward the packets on DetNet tunnels as well (with different >> endpoints). This is something our team researching so Balazs and Istvan >> might give you some info about that. But I admit that thinking about >> playing nicely with DetNet in regard of the current linux FRER >> implementation is more than overwhelming, but the Linux bridge would be >> a nice place to map TSN flows to DetNet flow like currently EVPN maps >> VLANs to VXLANs. Having FRER in the bridge also has some disadvantages. What if I want to use openvswitch? Netdevice based FRER can work with Linux bridge and openvswitch too. > So what would be the use case for bridging packets belonging to > unrecognized TSN streams? In my toy setups I almost ran out of ideas how > to drop unwanted traffic and prevent it from being looped forever. > STP, MSTP, MRP are all out the window, this is active redundancy, you > need to embrace the loops, so it isn't as if you can pretend that > something sane is going to happen with a packet if it isn't part of a > stream that gets special handling from 802.1CB. No broadcast, no > multicast, and self address filtering on all switch ports. It is really important that FRER streams need to be identified immediately and not handled as normal traffic. If we identify the FRER streams first, and handle them, then we can avoid the loops. So it is not enough to use a hook, we need to prevent further processing of FRER frames. The rest of the traffic can be handled normally, even broadcast/multicast. The unrecognized TSN streams may cause loop, that's true, but I think that is a misconfiguration, and can not be avoided. >> > Also, this new FRER software model complicates the offloading on NXP >> > LS1028A, but let's leave that aside, since it shouldn't really be the >> > decisive factor on what should the software model look like. >> > >> > Do you have any comments on this topic? >> I would like to see if others can join to the discussion as well, I will >> try to think about this problem more too. >> >> [0] https://lore.kernel.org/netdev/20220301050439.31785-10-roopa@nvidia.com/ >> >> Best, >> Ferenc
diff --git a/include/net/flow_offload.h b/include/net/flow_offload.h index 3961461d9c8b..cfa9b69cec69 100644 --- a/include/net/flow_offload.h +++ b/include/net/flow_offload.h @@ -148,6 +148,7 @@ enum flow_action_id { FLOW_ACTION_MPLS_MANGLE, FLOW_ACTION_GATE, FLOW_ACTION_PPPOE_PUSH, + FLOW_ACTION_FRER, NUM_FLOW_ACTIONS, }; @@ -278,6 +279,14 @@ struct flow_action_entry { struct { /* FLOW_ACTION_PPPOE_PUSH */ u16 sid; } pppoe; + struct { + u8 tag_type; + u8 tag_action; + u8 recover; + u8 rcvy_alg; + u8 rcvy_history_len; + u8 rcvy_reset_msec; + } frer; }; struct flow_action_cookie *cookie; /* user defined action cookie */ }; diff --git a/include/net/tc_act/tc_frer.h b/include/net/tc_act/tc_frer.h new file mode 100644 index 000000000000..b2ad2b2a3fe1 --- /dev/null +++ b/include/net/tc_act/tc_frer.h @@ -0,0 +1,52 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* Copyright 2021 NXP */ + +#ifndef __NET_TC_FRER_H +#define __NET_TC_FRER_H + +#include <net/act_api.h> +#include <linux/tc_act/tc_frer.h> + +struct tcf_frer; + +struct tcf_frer_proto_ops { + int (*encode)(struct sk_buff *skb, struct tcf_frer *frer_act); + int (*decode)(struct sk_buff *skb); + void (*tag_pop)(struct sk_buff *skb, struct tcf_frer *frer_act); +}; + +struct tcf_frer { + struct tc_action common; + u8 tag_type; + u8 tag_action; + u8 recover; + u8 rcvy_alg; + u8 rcvy_history_len; + u64 rcvy_reset_msec; + u32 gen_seq_num; + u32 rcvy_seq_num; + u64 seq_space; + u32 seq_history; + bool take_any; + bool rcvy_take_noseq; + u32 cps_seq_rcvy_lost_pkts; + u32 cps_seq_rcvy_tagless_pkts; + u32 cps_seq_rcvy_out_of_order_pkts; + u32 cps_seq_rcvy_rogue_pkts; + u32 cps_seq_rcvy_resets; + struct hrtimer hrtimer; + const struct tcf_frer_proto_ops *proto_ops; +}; + +#define to_frer(a) ((struct tcf_frer *)a) + +static inline bool is_tcf_frer(const struct tc_action *a) +{ +#ifdef CONFIG_NET_CLS_ACT + if (a->ops && a->ops->id == TCA_ID_FRER) + return true; +#endif + return false; +} + +#endif diff --git a/include/uapi/linux/if_ether.h b/include/uapi/linux/if_ether.h index 5f589c7a8382..812aa75f7f23 100644 --- a/include/uapi/linux/if_ether.h +++ b/include/uapi/linux/if_ether.h @@ -114,6 +114,7 @@ #define ETH_P_EDSA 0xDADA /* Ethertype DSA [ NOT AN OFFICIALLY REGISTERED ID ] */ #define ETH_P_DSA_8021Q 0xDADB /* Fake VLAN Header for DSA [ NOT AN OFFICIALLY REGISTERED ID ] */ #define ETH_P_IFE 0xED3E /* ForCES inter-FE LFB type */ +#define ETH_P_RTAG 0xF1C1 /* Redundancy Tag(IEEE 802.1CB) */ #define ETH_P_AF_IUCV 0xFBFB /* IBM af_iucv [ NOT AN OFFICIALLY REGISTERED ID ] */ #define ETH_P_802_3_MIN 0x0600 /* If the value in the ethernet type is less than this value diff --git a/include/uapi/linux/pkt_cls.h b/include/uapi/linux/pkt_cls.h index 6836ccb9c45d..a3fc0c478a65 100644 --- a/include/uapi/linux/pkt_cls.h +++ b/include/uapi/linux/pkt_cls.h @@ -136,6 +136,7 @@ enum tca_id { TCA_ID_MPLS, TCA_ID_CT, TCA_ID_GATE, + TCA_ID_FRER, /* other actions go here */ __TCA_ID_MAX = 255 }; diff --git a/include/uapi/linux/tc_act/tc_frer.h b/include/uapi/linux/tc_act/tc_frer.h new file mode 100644 index 000000000000..cd86274483e7 --- /dev/null +++ b/include/uapi/linux/tc_act/tc_frer.h @@ -0,0 +1,50 @@ +/* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */ +/* Copyright 2021 NXP */ + +#ifndef __LINUX_TC_FRER_H +#define __LINUX_TC_FRER_H + +#include <linux/pkt_cls.h> + +struct tc_frer { + tc_gen; +}; + +enum { + TCA_FRER_UNSPEC, + TCA_FRER_TM, + TCA_FRER_PARMS, + TCA_FRER_PAD, + TCA_FRER_TAG_TYPE, + TCA_FRER_TAG_ACTION, + TCA_FRER_RECOVER, + TCA_FRER_RECOVER_ALG, + TCA_FRER_RECOVER_HISTORY_LEN, + TCA_FRER_RECOVER_RESET_TM, + TCA_FRER_RECOVER_TAGLESS_PKTS, + TCA_FRER_RECOVER_OUT_OF_ORDER_PKTS, + TCA_FRER_RECOVER_ROGUE_PKTS, + TCA_FRER_RECOVER_LOST_PKTS, + TCA_FRER_RECOVER_RESETS, + __TCA_FRER_MAX, +}; +#define TCA_FRER_MAX (__TCA_FRER_MAX - 1) + +enum tc_frer_tag_action { + TCA_FRER_TAG_NULL, + TCA_FRER_TAG_PUSH, + TCA_FRER_TAG_POP, +}; + +enum tc_frer_tag_type { + TCA_FRER_TAG_RTAG, + TCA_FRER_TAG_HSR, + TCA_FRER_TAG_PRP, +}; + +enum tc_frer_rcvy_alg { + TCA_FRER_RCVY_VECTOR_ALG, + TCA_FRER_RCVY_MATCH_ALG, +}; + +#endif diff --git a/net/sched/Kconfig b/net/sched/Kconfig index 1e8ab4749c6c..93e2687042c2 100644 --- a/net/sched/Kconfig +++ b/net/sched/Kconfig @@ -997,6 +997,19 @@ config NET_ACT_GATE To compile this code as a module, choose M here: the module will be called act_gate. +config NET_ACT_FRER + tristate "Frame frer tc action" + depends on NET_CLS_ACT + help + Say Y here to support frame replication and elimination for + reliability, which is defined by IEEE 802.1CB. + This action allow to add a frer tag. It also allow to remove + the frer tag and drop repeat frames. + + If unsure, say N. + To compile this code as a module, choose M here: the + module will be called act_frer. + config NET_IFE_SKBMARK tristate "Support to encoding decoding skb mark on IFE action" depends on NET_ACT_IFE diff --git a/net/sched/Makefile b/net/sched/Makefile index dd14ef413fda..69e7e94be567 100644 --- a/net/sched/Makefile +++ b/net/sched/Makefile @@ -32,6 +32,7 @@ obj-$(CONFIG_NET_IFE_SKBTCINDEX) += act_meta_skbtcindex.o obj-$(CONFIG_NET_ACT_TUNNEL_KEY)+= act_tunnel_key.o obj-$(CONFIG_NET_ACT_CT) += act_ct.o obj-$(CONFIG_NET_ACT_GATE) += act_gate.o +obj-$(CONFIG_NET_ACT_FRER) += act_frer.o obj-$(CONFIG_NET_SCH_FIFO) += sch_fifo.o obj-$(CONFIG_NET_SCH_CBQ) += sch_cbq.o obj-$(CONFIG_NET_SCH_HTB) += sch_htb.o diff --git a/net/sched/act_frer.c b/net/sched/act_frer.c new file mode 100644 index 000000000000..6f8ec5782d3d --- /dev/null +++ b/net/sched/act_frer.c @@ -0,0 +1,695 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* Copyright 2021 NXP */ + +#include <linux/module.h> +#include <linux/types.h> +#include <linux/kernel.h> +#include <linux/string.h> +#include <linux/errno.h> +#include <linux/skbuff.h> +#include <linux/rtnetlink.h> +#include <linux/init.h> +#include <linux/slab.h> +#include <net/act_api.h> +#include <net/netlink.h> +#include <net/pkt_cls.h> +#include <net/tc_act/tc_frer.h> + +#define FRER_SEQ_SPACE 16 +#define FRER_RCVY_RESET_MSEC 100 +#define FRER_RCVY_INVALID_SEQ 0x100 +#define FRER_RCVY_PASSED 0 +#define FRER_RCVY_DISCARDED -1 + +static unsigned int frer_net_id; +static struct tc_action_ops act_frer_ops; + +struct r_tag { + __be16 reserved; + __be16 sequence_nr; + __be16 encap_proto; +} __packed; + +struct rtag_ethhdr { + struct ethhdr ethhdr; + struct r_tag h_rtag; +} __packed; + +struct rtag_vlan_ethhdr { + struct vlan_ethhdr vlanhdr; + struct r_tag h_rtag; +} __packed; + +static const struct nla_policy frer_policy[TCA_FRER_MAX + 1] = { + [TCA_FRER_PARMS] = + NLA_POLICY_EXACT_LEN(sizeof(struct tc_frer)), + [TCA_FRER_TAG_TYPE] = { .type = NLA_U8 }, + [TCA_FRER_TAG_ACTION] = { .type = NLA_U8 }, + [TCA_FRER_RECOVER] = { .type = NLA_U8 }, + [TCA_FRER_RECOVER_ALG] = { .type = NLA_U8 }, + [TCA_FRER_RECOVER_HISTORY_LEN] = { .type = NLA_U8 }, + [TCA_FRER_RECOVER_RESET_TM] = { .type = NLA_U64 }, +}; + +static void frer_seq_recovery_reset(struct tcf_frer *frer_act); + +static enum hrtimer_restart frer_hrtimer_func(struct hrtimer *timer) +{ + struct tcf_frer *frer_act = container_of(timer, struct tcf_frer, + hrtimer); + ktime_t remaining_tm; + + frer_seq_recovery_reset(frer_act); + + remaining_tm = (ktime_t)(frer_act->rcvy_reset_msec * 1000000); + + hrtimer_forward(timer, timer->base->get_time(), remaining_tm); + + return HRTIMER_RESTART; +} + +static int frer_rtag_decode(struct sk_buff *skb) +{ + struct rtag_vlan_ethhdr *rtag_vlan_hdr; + struct rtag_ethhdr *rtag_hdr; + struct vlan_ethhdr *vlanhdr; + struct ethhdr *ethhdr; + struct r_tag *rtag; + bool is_vlan; + u16 sequence; + u16 proto; + + ethhdr = (struct ethhdr *)skb_mac_header(skb); + proto = ethhdr->h_proto; + is_vlan = false; + + if (proto == htons(ETH_P_8021Q)) { + vlanhdr = (struct vlan_ethhdr *)ethhdr; + proto = vlanhdr->h_vlan_encapsulated_proto; + is_vlan = true; + } + + if (proto != htons(ETH_P_RTAG)) + return FRER_RCVY_INVALID_SEQ; + + if (is_vlan) { + rtag_vlan_hdr = (struct rtag_vlan_ethhdr *)ethhdr; + rtag = &rtag_vlan_hdr->h_rtag; + } else { + rtag_hdr = (struct rtag_ethhdr *)ethhdr; + rtag = &rtag_hdr->h_rtag; + } + + sequence = ntohs(rtag->sequence_nr); + + return sequence; +} + +static int frer_seq_generation_alg(struct tcf_frer *frer_act) +{ + u32 gen_seq_max = frer_act->seq_space - 1; + u32 gen_seq_num = frer_act->gen_seq_num; + int sequence_number; + + sequence_number = gen_seq_num; + + if (gen_seq_num >= gen_seq_max) + gen_seq_num = 0; + else + gen_seq_num++; + + frer_act->gen_seq_num = gen_seq_num; + + return sequence_number; +} + +static int frer_rtag_encode(struct sk_buff *skb, struct tcf_frer *frer_act) +{ + struct vlan_ethhdr *vlanhdr; + struct ethhdr *ethhdr; + struct r_tag *rtag; + int rtag_len, head_len; + unsigned char *dst, *src, *p; + __be16 *proto, proto_val; + + ethhdr = (struct ethhdr *)skb_mac_header(skb); + if (ethhdr->h_proto == htons(ETH_P_8021Q)) { + vlanhdr = (struct vlan_ethhdr *)ethhdr; + p = (unsigned char *)(vlanhdr + 1); + proto = &vlanhdr->h_vlan_encapsulated_proto; + } else { + p = (unsigned char *)(ethhdr + 1); + proto = ðhdr->h_proto; + } + + proto_val = *proto; + *proto = htons(ETH_P_RTAG); + + src = skb_mac_header(skb); + head_len = p - src; + + rtag_len = sizeof(struct r_tag); + if (skb_cow_head(skb, rtag_len) < 0) + return -ENOMEM; + + skb_push(skb, rtag_len); + skb->mac_header -= rtag_len; + + dst = skb_mac_header(skb); + memmove(dst, src, head_len); + + rtag = (struct r_tag *)(dst + head_len); + rtag->encap_proto = proto_val; + rtag->sequence_nr = htons(frer_act->gen_seq_num); + rtag->reserved = 0; + + return 0; +} + +static void frer_rtag_pop(struct sk_buff *skb, struct tcf_frer *frer_act) +{ + struct vlan_ethhdr *vlanhdr; + struct ethhdr *ethhdr; + struct r_tag *rtag; + int rtag_len, head_len; + unsigned char *dst, *src, *p; + __be16 *proto; + + ethhdr = (struct ethhdr *)skb_mac_header(skb); + + if (ethhdr->h_proto == htons(ETH_P_8021Q)) { + vlanhdr = (struct vlan_ethhdr *)ethhdr; + p = (unsigned char *)(vlanhdr + 1); + proto = &vlanhdr->h_vlan_encapsulated_proto; + } else { + p = (unsigned char *)(ethhdr + 1); + proto = ðhdr->h_proto; + } + + if (*proto != htons(ETH_P_RTAG)) + return; + + rtag = (struct r_tag *)p; + rtag_len = sizeof(struct r_tag); + *proto = rtag->encap_proto; + + src = skb_mac_header(skb); + head_len = p - src; + + skb->data = skb_mac_header(skb); + skb_pull(skb, rtag_len); + + skb_reset_mac_header(skb); + + if (skb->ip_summed == CHECKSUM_PARTIAL) + skb->csum_start += rtag_len; + + dst = skb_mac_header(skb); + memmove(dst, src, head_len); +} + +static const struct tcf_frer_proto_ops rtag_ops = { + .encode = frer_rtag_encode, + .decode = frer_rtag_decode, + .tag_pop = frer_rtag_pop, +}; + +static int tcf_frer_init(struct net *net, struct nlattr *nla, + struct nlattr *est, struct tc_action **a, + int ovr, int bind, bool rtnl_held, + struct tcf_proto *tp, u32 flags, + struct netlink_ext_ack *extack) +{ + struct tc_action_net *tn = net_generic(net, frer_net_id); + struct nlattr *tb[TCA_FRER_MAX + 1]; + struct tcf_chain *goto_ch = NULL; + struct tcf_frer *frer_act; + struct tc_frer *parm; + int ret = 0, err, index; + ktime_t remaining_tm; + + if (!nla) + return -EINVAL; + + err = nla_parse_nested(tb, TCA_FRER_MAX, nla, frer_policy, extack); + if (err < 0) + return err; + + if (!tb[TCA_FRER_PARMS]) + return -EINVAL; + + parm = nla_data(tb[TCA_FRER_PARMS]); + index = parm->index; + + err = tcf_idr_check_alloc(tn, &index, a, bind); + if (err < 0) + return err; + + if (err && bind) + return 0; + + if (!err) { + ret = tcf_idr_create(tn, index, est, a, + &act_frer_ops, bind, false, 0); + + if (ret) { + tcf_idr_cleanup(tn, index); + return ret; + } + } else if (!ovr) { + tcf_idr_release(*a, bind); + return -EEXIST; + } + + err = tcf_action_check_ctrlact(parm->action, tp, &goto_ch, extack); + if (err < 0) + goto release_idr; + + frer_act = to_frer(*a); + + spin_lock_bh(&frer_act->tcf_lock); + goto_ch = tcf_action_set_ctrlact(*a, parm->action, goto_ch); + + frer_act->tag_type = nla_get_u8(tb[TCA_FRER_TAG_TYPE]); + frer_act->tag_action = nla_get_u8(tb[TCA_FRER_TAG_ACTION]); + frer_act->recover = nla_get_u8(tb[TCA_FRER_RECOVER]); + frer_act->rcvy_alg = nla_get_u8(tb[TCA_FRER_RECOVER_ALG]); + frer_act->rcvy_history_len = nla_get_u8(tb[TCA_FRER_RECOVER_HISTORY_LEN]); + frer_act->rcvy_reset_msec = nla_get_u64(tb[TCA_FRER_RECOVER_RESET_TM]); + + frer_act->gen_seq_num = 0; + frer_act->seq_space = 1 << FRER_SEQ_SPACE; + frer_act->rcvy_seq_num = 0; + frer_act->seq_history = 0xFFFFFFFF; + frer_act->rcvy_take_noseq = true; + + switch (frer_act->tag_type) { + case TCA_FRER_TAG_RTAG: + frer_act->proto_ops = &rtag_ops; + break; + case TCA_FRER_TAG_HSR: + case TCA_FRER_TAG_PRP: + default: + spin_unlock_bh(&frer_act->tcf_lock); + return -EOPNOTSUPP; + } + + if (frer_act->recover && frer_act->rcvy_reset_msec) { + hrtimer_init(&frer_act->hrtimer, CLOCK_TAI, + HRTIMER_MODE_REL_SOFT); + frer_act->hrtimer.function = frer_hrtimer_func; + + remaining_tm = (ktime_t)(frer_act->rcvy_reset_msec * 1000000); + hrtimer_start(&frer_act->hrtimer, remaining_tm, + HRTIMER_MODE_REL_SOFT); + } + + spin_unlock_bh(&frer_act->tcf_lock); + + if (goto_ch) + tcf_chain_put_by_act(goto_ch); + + return ret; + +release_idr: + tcf_idr_release(*a, bind); + return err; +} + +static void frer_seq_recovery_reset(struct tcf_frer *frer_act) +{ + spin_lock(&frer_act->tcf_lock); + if (frer_act->rcvy_alg == TCA_FRER_RCVY_VECTOR_ALG) { + frer_act->rcvy_seq_num = frer_act->seq_space - 1; + frer_act->seq_history = 0; + } + frer_act->cps_seq_rcvy_resets++; + frer_act->take_any = true; + spin_unlock(&frer_act->tcf_lock); +} + +static void frer_shift_seq_history(int value, struct tcf_frer *frer_act) +{ + int history_len = frer_act->rcvy_history_len; + + if ((frer_act->seq_history & BIT(history_len - 1)) == 0) + frer_act->cps_seq_rcvy_lost_pkts++; + + frer_act->seq_history <<= 1; + + if (value) + frer_act->seq_history |= BIT(0); +} + +static int frer_vector_rcvy_alg(struct tcf_frer *frer_act, int sequence, + bool individual) +{ + struct hrtimer *timer = &frer_act->hrtimer; + bool reset_timer = false; + ktime_t remaining_tm; + int delta, ret; + + if (sequence == FRER_RCVY_INVALID_SEQ) { + frer_act->cps_seq_rcvy_tagless_pkts++; + if (frer_act->rcvy_take_noseq) { + reset_timer = true; + ret = FRER_RCVY_PASSED; + goto out; + } else { + return FRER_RCVY_DISCARDED; + } + } + + delta = (sequence - frer_act->rcvy_seq_num) & (frer_act->seq_space - 1); + /* -(RecovSeqSpace/2) <= delta <= ((RecovSeqSpace/2)-1) */ + if (delta & (frer_act->seq_space / 2)) + delta -= frer_act->seq_space; + + if (frer_act->take_any) { + frer_act->take_any = false; + frer_act->seq_history |= BIT(0); + frer_act->rcvy_seq_num = sequence; + + reset_timer = true; + ret = FRER_RCVY_PASSED; + goto out; + } + + if (delta >= frer_act->rcvy_history_len || + delta <= -frer_act->rcvy_history_len) { + /* Packet is out-of-range. */ + frer_act->cps_seq_rcvy_rogue_pkts++; + + if (individual) + reset_timer = true; + + ret = FRER_RCVY_DISCARDED; + goto out; + } else if (delta <= 0) { + /* Packet is old and in SequenceHistory. */ + if (frer_act->seq_history & BIT(-delta)) { + if (individual) + reset_timer = true; + + /* Packet has been seen. */ + ret = FRER_RCVY_DISCARDED; + goto out; + } else { + /* Packet has not been seen. */ + frer_act->seq_history |= BIT(-delta); + frer_act->cps_seq_rcvy_out_of_order_pkts++; + + reset_timer = true; + ret = FRER_RCVY_PASSED; + goto out; + } + } else { + /* Packet is not too far ahead of the one we want. */ + if (delta != 1) + frer_act->cps_seq_rcvy_out_of_order_pkts++; + + while (--delta) + frer_shift_seq_history(0, frer_act); + frer_shift_seq_history(1, frer_act); + frer_act->rcvy_seq_num = sequence; + + reset_timer = true; + ret = FRER_RCVY_PASSED; + goto out; + } +out: + if (reset_timer && frer_act->rcvy_reset_msec) { + remaining_tm = + (ktime_t)(frer_act->rcvy_reset_msec * 1000000); + hrtimer_start(timer, remaining_tm, HRTIMER_MODE_REL_SOFT); + } + + return ret; +} + +static int frer_match_rcvy_alg(struct tcf_frer *frer_act, int sequence, + bool individual) +{ + struct hrtimer *timer = &frer_act->hrtimer; + bool reset_timer = false; + ktime_t remaining_tm; + int delta, ret; + + if (sequence == FRER_RCVY_INVALID_SEQ) { + frer_act->cps_seq_rcvy_tagless_pkts++; + + return FRER_RCVY_PASSED; + } + + if (frer_act->take_any) { + frer_act->take_any = false; + frer_act->rcvy_seq_num = sequence; + + reset_timer = true; + ret = FRER_RCVY_PASSED; + goto out; + } + + delta = sequence - frer_act->rcvy_seq_num; + if (delta) { + /* Packet has not been seen, accept it. */ + if (delta != 1) + frer_act->cps_seq_rcvy_out_of_order_pkts++; + + frer_act->rcvy_seq_num = sequence; + + reset_timer = true; + ret = FRER_RCVY_PASSED; + goto out; + } else { + if (individual) + reset_timer = true; + + /* Packet has been seen. Do not forward. */ + ret = FRER_RCVY_DISCARDED; + goto out; + } + +out: + if (reset_timer && frer_act->rcvy_reset_msec) { + remaining_tm = (ktime_t)(frer_act->rcvy_reset_msec * 1000000); + hrtimer_start(timer, remaining_tm, HRTIMER_MODE_REL_SOFT); + } + + return ret; +} + +static int tcf_frer_act(struct sk_buff *skb, const struct tc_action *a, + struct tcf_result *res) +{ + struct tcf_frer *frer_act = to_frer(a); + bool ingress, individual; + int ret, retval; + int sequence; + + tcf_lastuse_update(&frer_act->tcf_tm); + tcf_action_update_bstats(&frer_act->common, skb); + + retval = READ_ONCE(frer_act->tcf_action); + + sequence = frer_act->proto_ops->decode(skb); + + ingress = skb_at_tc_ingress(skb); + individual = ingress; + + if (frer_act->recover) { + spin_lock(&frer_act->tcf_lock); + + if (frer_act->rcvy_alg == TCA_FRER_RCVY_VECTOR_ALG) + ret = frer_vector_rcvy_alg(frer_act, sequence, + individual); + else + ret = frer_match_rcvy_alg(frer_act, sequence, + individual); + if (ret) { + frer_act->tcf_qstats.drops++; + retval = TC_ACT_SHOT; + } + + if (frer_act->tag_action == TCA_FRER_TAG_POP) + frer_act->proto_ops->tag_pop(skb, frer_act); + + spin_unlock(&frer_act->tcf_lock); + + return retval; + } + + if (frer_act->tag_action == TCA_FRER_TAG_PUSH && + sequence == FRER_RCVY_INVALID_SEQ) { + spin_lock(&frer_act->tcf_lock); + + frer_seq_generation_alg(frer_act); + + frer_act->proto_ops->encode(skb, frer_act); + + spin_unlock(&frer_act->tcf_lock); + } + + return retval; +} + +static int tcf_frer_dump(struct sk_buff *skb, struct tc_action *a, + int bind, int ref) +{ + unsigned char *b = skb_tail_pointer(skb); + struct tcf_frer *frer_act = to_frer(a); + struct tc_frer opt = { + .index = frer_act->tcf_index, + .refcnt = refcount_read(&frer_act->tcf_refcnt) - ref, + .bindcnt = atomic_read(&frer_act->tcf_bindcnt) - bind, + }; + struct tcf_t t; + + spin_lock_bh(&frer_act->tcf_lock); + opt.action = frer_act->tcf_action; + + if (nla_put(skb, TCA_FRER_PARMS, sizeof(opt), &opt)) + goto nla_put_failure; + + if (nla_put_u8(skb, TCA_FRER_TAG_TYPE, frer_act->tag_type)) + goto nla_put_failure; + + if (nla_put_u8(skb, TCA_FRER_TAG_ACTION, frer_act->tag_action)) + goto nla_put_failure; + + if (nla_put_u8(skb, TCA_FRER_RECOVER, frer_act->recover)) + goto nla_put_failure; + + if (nla_put_u8(skb, TCA_FRER_RECOVER_ALG, frer_act->rcvy_alg)) + goto nla_put_failure; + + if (nla_put_u8(skb, TCA_FRER_RECOVER_HISTORY_LEN, + frer_act->rcvy_history_len)) + goto nla_put_failure; + + if (nla_put_u64_64bit(skb, TCA_FRER_RECOVER_RESET_TM, + frer_act->rcvy_reset_msec, TCA_FRER_PAD)) + goto nla_put_failure; + + if (nla_put_u32(skb, TCA_FRER_RECOVER_TAGLESS_PKTS, + frer_act->cps_seq_rcvy_tagless_pkts)) + goto nla_put_failure; + + if (nla_put_u32(skb, TCA_FRER_RECOVER_OUT_OF_ORDER_PKTS, + frer_act->cps_seq_rcvy_out_of_order_pkts)) + goto nla_put_failure; + + if (nla_put_u32(skb, TCA_FRER_RECOVER_ROGUE_PKTS, + frer_act->cps_seq_rcvy_rogue_pkts)) + goto nla_put_failure; + + if (nla_put_u32(skb, TCA_FRER_RECOVER_LOST_PKTS, + frer_act->cps_seq_rcvy_lost_pkts)) + goto nla_put_failure; + + if (nla_put_u32(skb, TCA_FRER_RECOVER_RESETS, + frer_act->cps_seq_rcvy_resets)) + goto nla_put_failure; + + tcf_tm_dump(&t, &frer_act->tcf_tm); + if (nla_put_64bit(skb, TCA_FRER_TM, sizeof(t), + &t, TCA_FRER_PAD)) + goto nla_put_failure; + spin_unlock_bh(&frer_act->tcf_lock); + + return skb->len; + +nla_put_failure: + spin_unlock_bh(&frer_act->tcf_lock); + nlmsg_trim(skb, b); + + return -1; +} + +static int tcf_frer_walker(struct net *net, struct sk_buff *skb, + struct netlink_callback *cb, int type, + const struct tc_action_ops *ops, + struct netlink_ext_ack *extack) +{ + struct tc_action_net *tn = net_generic(net, frer_net_id); + + return tcf_generic_walker(tn, skb, cb, type, ops, extack); +} + +static int tcf_frer_search(struct net *net, struct tc_action **a, u32 index) +{ + struct tc_action_net *tn = net_generic(net, frer_net_id); + + return tcf_idr_search(tn, a, index); +} + +static void tcf_frer_stats_update(struct tc_action *a, u64 bytes, u64 packets, + u64 drops, u64 lastuse, bool hw) +{ + struct tcf_frer *frer_act = to_frer(a); + struct tcf_t *tm = &frer_act->tcf_tm; + + tcf_action_update_stats(a, bytes, packets, drops, hw); + tm->lastuse = max_t(u64, tm->lastuse, lastuse); +} + +static void tcf_frer_cleanup(struct tc_action *a) +{ + struct tcf_frer *frer_act = to_frer(a); + + if (frer_act->rcvy_reset_msec) + hrtimer_cancel(&frer_act->hrtimer); +} + +static size_t tcf_frer_get_fill_size(const struct tc_action *act) +{ + return nla_total_size(sizeof(struct tc_frer)); +} + +static struct tc_action_ops act_frer_ops = { + .kind = "frer", + .id = TCA_ID_FRER, + .owner = THIS_MODULE, + .act = tcf_frer_act, + .init = tcf_frer_init, + .cleanup = tcf_frer_cleanup, + .dump = tcf_frer_dump, + .walk = tcf_frer_walker, + .stats_update = tcf_frer_stats_update, + .get_fill_size = tcf_frer_get_fill_size, + .lookup = tcf_frer_search, + .size = sizeof(struct tcf_frer), +}; + +static __net_init int frer_init_net(struct net *net) +{ + struct tc_action_net *tn = net_generic(net, frer_net_id); + + return tc_action_net_init(net, tn, &act_frer_ops); +} + +static void __net_exit frer_exit_net(struct list_head *net_list) +{ + tc_action_net_exit(net_list, frer_net_id); +}; + +static struct pernet_operations frer_net_ops = { + .init = frer_init_net, + .exit_batch = frer_exit_net, + .id = &frer_net_id, + .size = sizeof(struct tc_action_net), +}; + +static int __init frer_init_module(void) +{ + return tcf_register_action(&act_frer_ops, &frer_net_ops); +} + +static void __exit frer_cleanup_module(void) +{ + tcf_unregister_action(&act_frer_ops, &frer_net_ops); +} + +module_init(frer_init_module); +module_exit(frer_cleanup_module); +MODULE_LICENSE("GPL v2"); diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index 2ef8f5a6205a..353184987427 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -39,6 +39,7 @@ #include <net/tc_act/tc_ct.h> #include <net/tc_act/tc_mpls.h> #include <net/tc_act/tc_gate.h> +#include <net/tc_act/tc_frer.h> #include <net/flow_offload.h> extern const struct nla_policy rtm_tca_policy[TCA_MAX + 1]; @@ -3706,6 +3707,16 @@ int tc_setup_flow_action(struct flow_action *flow_action, err = tcf_gate_get_entries(entry, act); if (err) goto err_out_locked; + } else if (is_tcf_frer(act)) { + entry->id = FLOW_ACTION_FRER; + entry->frer.tag_type = to_frer(act)->tag_type; + entry->frer.tag_action = to_frer(act)->tag_action; + entry->frer.recover = to_frer(act)->recover; + entry->frer.rcvy_alg = to_frer(act)->rcvy_alg; + entry->frer.rcvy_history_len = + to_frer(act)->rcvy_history_len; + entry->frer.rcvy_reset_msec = + to_frer(act)->rcvy_reset_msec; } else { err = -EOPNOTSUPP; goto err_out_locked;