Message ID | b231c7d0acacd702284158cd44734e72ef661a01.1673423199.git.william.xuanziyang@huawei.com (mailing list archive) |
---|---|
State | Superseded |
Delegated to: | BPF |
Headers | show |
Series | bpf: Add ipip6 and ip6ip decap support for bpf_skb_adjust_room() | expand |
On Wed, Jan 11, 2023 at 3:01 AM Ziyang Xuan <william.xuanziyang@huawei.com> wrote: > > Add ipip6 and ip6ip decap support for bpf_skb_adjust_room(). > Main use case is for using cls_bpf on ingress hook to decapsulate > IPv4 over IPv6 and IPv6 over IPv4 tunnel packets. > > Add two new flags BPF_F_ADJ_ROOM_DECAP_L3_IPV{4,6} to indicate the > new IP header version after decapsulating the outer IP header. > > Suggested-by: Willem de Bruijn <willemb@google.com> > Signed-off-by: Ziyang Xuan <william.xuanziyang@huawei.com> > --- > include/uapi/linux/bpf.h | 8 ++++++++ > net/core/filter.c | 26 +++++++++++++++++++++++++- > tools/include/uapi/linux/bpf.h | 8 ++++++++ > 3 files changed, 41 insertions(+), 1 deletion(-) > > diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h > index 464ca3f01fe7..dde1c2ea1c84 100644 > --- a/include/uapi/linux/bpf.h > +++ b/include/uapi/linux/bpf.h > @@ -2644,6 +2644,12 @@ union bpf_attr { > * Use with BPF_F_ADJ_ROOM_ENCAP_L2 flag to further specify the > * L2 type as Ethernet. > * > + * * **BPF_F_ADJ_ROOM_DECAP_L3_IPV4**, > + * **BPF_F_ADJ_ROOM_DECAP_L3_IPV6**: > + * Indicate the new IP header version after decapsulating the > + * outer IP header. Mainly used in scenarios that the inner and > + * outer IP versions are different. > + * Nit (only since I have another comment below) Indicate -> Set [Mainly used .. that] -> [Used when] > if (skb_is_gso(skb)) { > struct skb_shared_info *shinfo = skb_shinfo(skb); > > @@ -3596,6 +3609,10 @@ BPF_CALL_4(bpf_skb_adjust_room, struct sk_buff *, skb, s32, len_diff, > if (unlikely(proto != htons(ETH_P_IP) && > proto != htons(ETH_P_IPV6))) > return -ENOTSUPP; > + if (unlikely((shrink && ((flags & BPF_F_ADJ_ROOM_DECAP_L3_MASK) == > + BPF_F_ADJ_ROOM_DECAP_L3_MASK)) || (!shrink && > + flags & BPF_F_ADJ_ROOM_DECAP_L3_MASK))) > + return -EINVAL; > > off = skb_mac_header_len(skb); > switch (mode) { > @@ -3608,6 +3625,13 @@ BPF_CALL_4(bpf_skb_adjust_room, struct sk_buff *, skb, s32, len_diff, > return -ENOTSUPP; > } > > + if (shrink) { > + if (flags & BPF_F_ADJ_ROOM_DECAP_L3_IPV6) > + len_min = sizeof(struct ipv6hdr); > + else if (flags & BPF_F_ADJ_ROOM_DECAP_L3_IPV4) > + len_min = sizeof(struct iphdr); > + } > + How about combining this branch with the above: if (flags & BPF_F_ADJ_ROOM_DECAP_L3_MASK) { if (!shrink) return -EINVAL; switch (flags & BPF_F_ADJ_ROOM_DECAP_L3_MASK) { case BPF_F_ADJ_ROOM_DECAP_L3_IPV4: len_min = sizeof(struct iphdr); break; case BPF_F_ADJ_ROOM_DECAP_L3_IPV6: len_min = sizeof(struct ipv6hdr); break; default: return -EINVAL; }
On 1/11/23 12:01 AM, Ziyang Xuan wrote: > diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h > index 464ca3f01fe7..dde1c2ea1c84 100644 > --- a/include/uapi/linux/bpf.h > +++ b/include/uapi/linux/bpf.h > @@ -2644,6 +2644,12 @@ union bpf_attr { > * Use with BPF_F_ADJ_ROOM_ENCAP_L2 flag to further specify the > * L2 type as Ethernet. > * > + * * **BPF_F_ADJ_ROOM_DECAP_L3_IPV4**, > + * **BPF_F_ADJ_ROOM_DECAP_L3_IPV6**: > + * Indicate the new IP header version after decapsulating the > + * outer IP header. Mainly used in scenarios that the inner and > + * outer IP versions are different. > + * selftests/bpf failed to compile. It is probably because there is leading spaces instead of using tabs: https://github.com/kernel-patches/bpf/actions/runs/3890850490/jobs/6640395038#step:11:112 /tmp/work/bpf/bpf/tools/testing/selftests/bpf/bpf-helpers.rst:1112: (WARNING/2) Bullet list ends without a blank line; unexpected unindent. make[1]: *** [Makefile.docs:76: /tmp/work/bpf/bpf/tools/testing/selftests/bpf/bpf-helpers.7] Error 12 make: *** [Makefile:259: docs] Error 2
> On Wed, Jan 11, 2023 at 3:01 AM Ziyang Xuan > <william.xuanziyang@huawei.com> wrote: >> >> Add ipip6 and ip6ip decap support for bpf_skb_adjust_room(). >> Main use case is for using cls_bpf on ingress hook to decapsulate >> IPv4 over IPv6 and IPv6 over IPv4 tunnel packets. >> >> Add two new flags BPF_F_ADJ_ROOM_DECAP_L3_IPV{4,6} to indicate the >> new IP header version after decapsulating the outer IP header. >> >> Suggested-by: Willem de Bruijn <willemb@google.com> >> Signed-off-by: Ziyang Xuan <william.xuanziyang@huawei.com> >> --- >> include/uapi/linux/bpf.h | 8 ++++++++ >> net/core/filter.c | 26 +++++++++++++++++++++++++- >> tools/include/uapi/linux/bpf.h | 8 ++++++++ >> 3 files changed, 41 insertions(+), 1 deletion(-) >> >> diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h >> index 464ca3f01fe7..dde1c2ea1c84 100644 >> --- a/include/uapi/linux/bpf.h >> +++ b/include/uapi/linux/bpf.h >> @@ -2644,6 +2644,12 @@ union bpf_attr { >> * Use with BPF_F_ADJ_ROOM_ENCAP_L2 flag to further specify the >> * L2 type as Ethernet. >> * >> + * * **BPF_F_ADJ_ROOM_DECAP_L3_IPV4**, >> + * **BPF_F_ADJ_ROOM_DECAP_L3_IPV6**: >> + * Indicate the new IP header version after decapsulating the >> + * outer IP header. Mainly used in scenarios that the inner and >> + * outer IP versions are different. >> + * > > Nit (only since I have another comment below) > > Indicate -> Set Sorry, I think "Indicate" maybe more suitable. Because the new IP header is original inner IP header, it's not be changed. The flags assist the kernel to better complete specific tasks. I think "Set" has a meaning of change. > [Mainly used .. that] -> [Used when] This looks good to me. Thanks! > >> if (skb_is_gso(skb)) { >> struct skb_shared_info *shinfo = skb_shinfo(skb); >> >> @@ -3596,6 +3609,10 @@ BPF_CALL_4(bpf_skb_adjust_room, struct sk_buff *, skb, s32, len_diff, >> if (unlikely(proto != htons(ETH_P_IP) && >> proto != htons(ETH_P_IPV6))) >> return -ENOTSUPP; >> + if (unlikely((shrink && ((flags & BPF_F_ADJ_ROOM_DECAP_L3_MASK) == >> + BPF_F_ADJ_ROOM_DECAP_L3_MASK)) || (!shrink && >> + flags & BPF_F_ADJ_ROOM_DECAP_L3_MASK))) >> + return -EINVAL; >> >> off = skb_mac_header_len(skb); >> switch (mode) { >> @@ -3608,6 +3625,13 @@ BPF_CALL_4(bpf_skb_adjust_room, struct sk_buff *, skb, s32, len_diff, >> return -ENOTSUPP; >> } >> >> + if (shrink) { >> + if (flags & BPF_F_ADJ_ROOM_DECAP_L3_IPV6) >> + len_min = sizeof(struct ipv6hdr); >> + else if (flags & BPF_F_ADJ_ROOM_DECAP_L3_IPV4) >> + len_min = sizeof(struct iphdr); >> + } >> + > > How about combining this branch with the above: > > if (flags & BPF_F_ADJ_ROOM_DECAP_L3_MASK) { > if (!shrink) > return -EINVAL; > > switch (flags & BPF_F_ADJ_ROOM_DECAP_L3_MASK) { > case BPF_F_ADJ_ROOM_DECAP_L3_IPV4: > len_min = sizeof(struct iphdr); > break; > case BPF_F_ADJ_ROOM_DECAP_L3_IPV6: > len_min = sizeof(struct ipv6hdr); > break; > default: > return -EINVAL; > } > This looks good to me. Thanks! >
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 464ca3f01fe7..dde1c2ea1c84 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -2644,6 +2644,12 @@ union bpf_attr { * Use with BPF_F_ADJ_ROOM_ENCAP_L2 flag to further specify the * L2 type as Ethernet. * + * * **BPF_F_ADJ_ROOM_DECAP_L3_IPV4**, + * **BPF_F_ADJ_ROOM_DECAP_L3_IPV6**: + * Indicate the new IP header version after decapsulating the + * outer IP header. Mainly used in scenarios that the inner and + * outer IP versions are different. + * * A call to this helper is susceptible to change the underlying * packet buffer. Therefore, at load time, all checks on pointers * previously done by the verifier are invalidated and must be @@ -5803,6 +5809,8 @@ enum { BPF_F_ADJ_ROOM_ENCAP_L4_UDP = (1ULL << 4), BPF_F_ADJ_ROOM_NO_CSUM_RESET = (1ULL << 5), BPF_F_ADJ_ROOM_ENCAP_L2_ETH = (1ULL << 6), + BPF_F_ADJ_ROOM_DECAP_L3_IPV4 = (1ULL << 7), + BPF_F_ADJ_ROOM_DECAP_L3_IPV6 = (1ULL << 8), }; enum { diff --git a/net/core/filter.c b/net/core/filter.c index 43cc1fe58a2c..5fb113953f80 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -3381,13 +3381,17 @@ static u32 bpf_skb_net_base_len(const struct sk_buff *skb) #define BPF_F_ADJ_ROOM_ENCAP_L3_MASK (BPF_F_ADJ_ROOM_ENCAP_L3_IPV4 | \ BPF_F_ADJ_ROOM_ENCAP_L3_IPV6) +#define BPF_F_ADJ_ROOM_DECAP_L3_MASK (BPF_F_ADJ_ROOM_DECAP_L3_IPV4 | \ + BPF_F_ADJ_ROOM_DECAP_L3_IPV6) + #define BPF_F_ADJ_ROOM_MASK (BPF_F_ADJ_ROOM_FIXED_GSO | \ BPF_F_ADJ_ROOM_ENCAP_L3_MASK | \ BPF_F_ADJ_ROOM_ENCAP_L4_GRE | \ BPF_F_ADJ_ROOM_ENCAP_L4_UDP | \ BPF_F_ADJ_ROOM_ENCAP_L2_ETH | \ BPF_F_ADJ_ROOM_ENCAP_L2( \ - BPF_ADJ_ROOM_ENCAP_L2_MASK)) + BPF_ADJ_ROOM_ENCAP_L2_MASK) | \ + BPF_F_ADJ_ROOM_DECAP_L3_MASK) static int bpf_skb_net_grow(struct sk_buff *skb, u32 off, u32 len_diff, u64 flags) @@ -3501,6 +3505,7 @@ static int bpf_skb_net_shrink(struct sk_buff *skb, u32 off, u32 len_diff, int ret; if (unlikely(flags & ~(BPF_F_ADJ_ROOM_FIXED_GSO | + BPF_F_ADJ_ROOM_DECAP_L3_MASK | BPF_F_ADJ_ROOM_NO_CSUM_RESET))) return -EINVAL; @@ -3519,6 +3524,14 @@ static int bpf_skb_net_shrink(struct sk_buff *skb, u32 off, u32 len_diff, if (unlikely(ret < 0)) return ret; + /* Match skb->protocol to new outer l3 protocol */ + if (skb->protocol == htons(ETH_P_IP) && + flags & BPF_F_ADJ_ROOM_DECAP_L3_IPV6) + skb->protocol = htons(ETH_P_IPV6); + else if (skb->protocol == htons(ETH_P_IPV6) && + flags & BPF_F_ADJ_ROOM_DECAP_L3_IPV4) + skb->protocol = htons(ETH_P_IP); + if (skb_is_gso(skb)) { struct skb_shared_info *shinfo = skb_shinfo(skb); @@ -3596,6 +3609,10 @@ BPF_CALL_4(bpf_skb_adjust_room, struct sk_buff *, skb, s32, len_diff, if (unlikely(proto != htons(ETH_P_IP) && proto != htons(ETH_P_IPV6))) return -ENOTSUPP; + if (unlikely((shrink && ((flags & BPF_F_ADJ_ROOM_DECAP_L3_MASK) == + BPF_F_ADJ_ROOM_DECAP_L3_MASK)) || (!shrink && + flags & BPF_F_ADJ_ROOM_DECAP_L3_MASK))) + return -EINVAL; off = skb_mac_header_len(skb); switch (mode) { @@ -3608,6 +3625,13 @@ BPF_CALL_4(bpf_skb_adjust_room, struct sk_buff *, skb, s32, len_diff, return -ENOTSUPP; } + if (shrink) { + if (flags & BPF_F_ADJ_ROOM_DECAP_L3_IPV6) + len_min = sizeof(struct ipv6hdr); + else if (flags & BPF_F_ADJ_ROOM_DECAP_L3_IPV4) + len_min = sizeof(struct iphdr); + } + len_cur = skb->len - skb_network_offset(skb); if ((shrink && (len_diff_abs >= len_cur || len_cur - len_diff_abs < len_min)) || diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 464ca3f01fe7..22672e5c8466 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -2644,6 +2644,12 @@ union bpf_attr { * Use with BPF_F_ADJ_ROOM_ENCAP_L2 flag to further specify the * L2 type as Ethernet. * + * * **BPF_F_ADJ_ROOM_DECAP_L3_IPV4**, + * **BPF_F_ADJ_ROOM_DECAP_L3_IPV6**: + * Indicate the new IP header version after decapsulating the + * outer IP header. Mainly used in scenarios that the inner and + * outer IP versions are different. + * * A call to this helper is susceptible to change the underlying * packet buffer. Therefore, at load time, all checks on pointers * previously done by the verifier are invalidated and must be @@ -5803,6 +5809,8 @@ enum { BPF_F_ADJ_ROOM_ENCAP_L4_UDP = (1ULL << 4), BPF_F_ADJ_ROOM_NO_CSUM_RESET = (1ULL << 5), BPF_F_ADJ_ROOM_ENCAP_L2_ETH = (1ULL << 6), + BPF_F_ADJ_ROOM_DECAP_L3_IPV4 = (1ULL << 7), + BPF_F_ADJ_ROOM_DECAP_L3_IPV6 = (1ULL << 8), }; enum {
Add ipip6 and ip6ip decap support for bpf_skb_adjust_room(). Main use case is for using cls_bpf on ingress hook to decapsulate IPv4 over IPv6 and IPv6 over IPv4 tunnel packets. Add two new flags BPF_F_ADJ_ROOM_DECAP_L3_IPV{4,6} to indicate the new IP header version after decapsulating the outer IP header. Suggested-by: Willem de Bruijn <willemb@google.com> Signed-off-by: Ziyang Xuan <william.xuanziyang@huawei.com> --- include/uapi/linux/bpf.h | 8 ++++++++ net/core/filter.c | 26 +++++++++++++++++++++++++- tools/include/uapi/linux/bpf.h | 8 ++++++++ 3 files changed, 41 insertions(+), 1 deletion(-)