Message ID | 20240703104640.20878-1-fw@strlen.de (mailing list archive) |
---|---|
State | Accepted |
Commit | c7f79f2620b7776586c626edf21eb6ed6ed3d1eb |
Delegated to: | Netdev Maintainers |
Headers | show |
Series | [net-next] openvswitch: prepare for stolen verdict coming from conntrack and nat engine | expand |
Hi Florian, Florian Westphal <fw@strlen.de> writes: > At this time, conntrack either returns NF_ACCEPT or NF_DROP. > To improve debuging it would be nice to be able to replace NF_DROP > verdict with NF_DROP_REASON() helper, > > This helper releases the skb instantly (so drop_monitor can pinpoint > precise location) and returns NF_STOLEN. > > Prepare call sites to deal with this before introducing such changes > in conntrack and nat core. > > Signed-off-by: Florian Westphal <fw@strlen.de> > --- AFAIU, these changes are only impacting the existing NF_DROP cases, and won't impact how ovs + netfilter communicate about invalid packets. One important thing to note is that we rely on: * Note that if the packet is deemed invalid by conntrack, skb->_nfct will be * set to NULL and 0 will be returned. Based on this, my understanding is if packet isn't part of a valid connection, skb->_nfct is NULL and NF_ACCEPT is returned. If this changes, those flow pipelines matching on ct_state(+inv+trk) will no longer function as expected since we will bail early. I think this comment will also apply to the act_ct change as well. > net/openvswitch/conntrack.c | 47 +++++++++++++++++++++++++++++-------- > 1 file changed, 37 insertions(+), 10 deletions(-) > > diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c > index 3b980bf2770b..8eb1d644b741 100644 > --- a/net/openvswitch/conntrack.c > +++ b/net/openvswitch/conntrack.c > @@ -679,6 +679,8 @@ static int ovs_ct_nat(struct net *net, struct sw_flow_key *key, > action |= BIT(NF_NAT_MANIP_DST); > > err = nf_ct_nat(skb, ct, ctinfo, &action, &info->range, info->commit); > + if (err != NF_ACCEPT) > + return err; > > if (action & BIT(NF_NAT_MANIP_SRC)) > ovs_nat_update_key(key, skb, NF_NAT_MANIP_SRC); > @@ -697,6 +699,22 @@ static int ovs_ct_nat(struct net *net, struct sw_flow_key *key, > } > #endif > > +static int verdict_to_errno(unsigned int verdict) > +{ > + switch (verdict & NF_VERDICT_MASK) { > + case NF_ACCEPT: > + return 0; > + case NF_DROP: > + return -EINVAL; > + case NF_STOLEN: > + return -EINPROGRESS; > + default: > + break; > + } > + > + return -EINVAL; > +} > + > /* Pass 'skb' through conntrack in 'net', using zone configured in 'info', if > * not done already. Update key with new CT state after passing the packet > * through conntrack. > @@ -735,7 +753,7 @@ static int __ovs_ct_lookup(struct net *net, struct sw_flow_key *key, > > err = nf_conntrack_in(skb, &state); > if (err != NF_ACCEPT) > - return -ENOENT; > + return verdict_to_errno(err); > > /* Clear CT state NAT flags to mark that we have not yet done > * NAT after the nf_conntrack_in() call. We can actually clear > @@ -762,9 +780,12 @@ static int __ovs_ct_lookup(struct net *net, struct sw_flow_key *key, > * the key->ct_state. > */ > if (info->nat && !(key->ct_state & OVS_CS_F_NAT_MASK) && > - (nf_ct_is_confirmed(ct) || info->commit) && > - ovs_ct_nat(net, key, info, skb, ct, ctinfo) != NF_ACCEPT) { > - return -EINVAL; > + (nf_ct_is_confirmed(ct) || info->commit)) { > + int err = ovs_ct_nat(net, key, info, skb, ct, ctinfo); > + > + err = verdict_to_errno(err); > + if (err) > + return err; > } > > /* Userspace may decide to perform a ct lookup without a helper > @@ -795,9 +816,12 @@ static int __ovs_ct_lookup(struct net *net, struct sw_flow_key *key, > * - When committing an unconfirmed connection. > */ > if ((nf_ct_is_confirmed(ct) ? !cached || add_helper : > - info->commit) && > - nf_ct_helper(skb, ct, ctinfo, info->family) != NF_ACCEPT) { > - return -EINVAL; > + info->commit)) { > + int err = nf_ct_helper(skb, ct, ctinfo, info->family); > + > + err = verdict_to_errno(err); > + if (err) > + return err; > } > > if (nf_ct_protonum(ct) == IPPROTO_TCP && > @@ -1001,10 +1025,9 @@ static int ovs_ct_commit(struct net *net, struct sw_flow_key *key, > /* This will take care of sending queued events even if the connection > * is already confirmed. > */ > - if (nf_conntrack_confirm(skb) != NF_ACCEPT) > - return -EINVAL; > + err = nf_conntrack_confirm(skb); > > - return 0; > + return verdict_to_errno(err); > } > > /* Returns 0 on success, -EINPROGRESS if 'skb' is stolen, or other nonzero > @@ -1039,6 +1062,10 @@ int ovs_ct_execute(struct net *net, struct sk_buff *skb, > else > err = ovs_ct_lookup(net, key, info, skb); > > + /* conntrack core returned NF_STOLEN */ > + if (err == -EINPROGRESS) > + return err; > + > skb_push_rcsum(skb, nh_ofs); > if (err) > ovs_kfree_skb_reason(skb, OVS_DROP_CONNTRACK);
Aaron Conole <aconole@redhat.com> wrote: > > verdict with NF_DROP_REASON() helper, > > > > This helper releases the skb instantly (so drop_monitor can pinpoint > > precise location) and returns NF_STOLEN. > > > > Prepare call sites to deal with this before introducing such changes > > in conntrack and nat core. > > > > Signed-off-by: Florian Westphal <fw@strlen.de> > > --- > > AFAIU, these changes are only impacting the existing NF_DROP cases, and > won't impact how ovs + netfilter communicate about invalid packets. One > important thing to note is that we rely on: > > * Note that if the packet is deemed invalid by conntrack, skb->_nfct will be > * set to NULL and 0 will be returned. Right, this is about how to communicate 'packet dropped'. NF_DROP means 'please call kfree_skb for me'. Problem from introspection point of view is that drop monitor will blame nf_hook_slow() (for netfilter) and ovs resp. act_ct for the drop. Plan is to allow conntrack/nat engine to return STOLEN verdict ("skb might have been free'd already"). Example change: @@ -52,10 +53,8 @@ nf_nat_masquerade_ipv4(struct sk_buff *skb, unsigned int hooknum, rt = skb_rtable(skb); nh = rt_nexthop(rt, ip_hdr(skb)->daddr); newsrc = inet_select_addr(out, nh, RT_SCOPE_UNIVERSE); - if (!newsrc) { - pr_info("%s ate my IP address\n", out->name); - return NF_DROP; - } + if (!newsrc) + return NF_DROP_REASON(skb, SKB_DROP_REASON_NETFILTER_DROP, EADDRNOTAVAIL); Where NF_DROP_REASON() is: static __always_inline int NF_DROP_REASON(struct sk_buff *skb, enum skb_drop_reason reason, u32 err) { BUILD_BUG_ON(err > 0xffff); kfree_skb_reason(skb, reason); return ((err << 16) | NF_STOLEN); } So drop monitoring tools will blame nf_nat_masquerade.c:nf_nat_masquerade_ipv4 and not the consumer of the NF_DROP verdict. I can't make such changes ATM because ovs and act_ct assume conntrack returns only ACCEPT and DROP, so we'd get double-free. Hope that makes sense. Thanks!
Florian Westphal <fw@strlen.de> writes: > Aaron Conole <aconole@redhat.com> wrote: >> > verdict with NF_DROP_REASON() helper, >> > >> > This helper releases the skb instantly (so drop_monitor can pinpoint >> > precise location) and returns NF_STOLEN. >> > >> > Prepare call sites to deal with this before introducing such changes >> > in conntrack and nat core. >> > >> > Signed-off-by: Florian Westphal <fw@strlen.de> >> > --- >> >> AFAIU, these changes are only impacting the existing NF_DROP cases, and >> won't impact how ovs + netfilter communicate about invalid packets. One >> important thing to note is that we rely on: >> >> * Note that if the packet is deemed invalid by conntrack, skb->_nfct will be >> * set to NULL and 0 will be returned. > > Right, this is about how to communicate 'packet dropped'. > > NF_DROP means 'please call kfree_skb for me'. Problem from introspection point > of view is that drop monitor will blame nf_hook_slow() (for netfilter) > and ovs resp. act_ct for the drop. > > Plan is to allow conntrack/nat engine to return STOLEN verdict ("skb > might have been free'd already"). > > Example change: > @@ -52,10 +53,8 @@ nf_nat_masquerade_ipv4(struct sk_buff *skb, > unsigned int hooknum, > rt = skb_rtable(skb); > nh = rt_nexthop(rt, ip_hdr(skb)->daddr); > newsrc = inet_select_addr(out, nh, RT_SCOPE_UNIVERSE); > - if (!newsrc) { > - pr_info("%s ate my IP address\n", out->name); > - return NF_DROP; > - } > + if (!newsrc) > + return NF_DROP_REASON(skb, SKB_DROP_REASON_NETFILTER_DROP, > EADDRNOTAVAIL); > > > Where NF_DROP_REASON() is: > > static __always_inline int > NF_DROP_REASON(struct sk_buff *skb, enum skb_drop_reason reason, u32 err) > { > BUILD_BUG_ON(err > 0xffff); > > kfree_skb_reason(skb, reason); > > return ((err << 16) | NF_STOLEN); > } > > So drop monitoring tools will blame > nf_nat_masquerade.c:nf_nat_masquerade_ipv4 and not > the consumer of the NF_DROP verdict. > > I can't make such changes ATM because ovs and act_ct assume conntrack > returns only ACCEPT and DROP, so we'd get double-free. Hope that makes > sense. > > Thanks! Makes sense to me, thanks!
Florian Westphal <fw@strlen.de> writes: > At this time, conntrack either returns NF_ACCEPT or NF_DROP. > To improve debuging it would be nice to be able to replace NF_DROP > verdict with NF_DROP_REASON() helper, > > This helper releases the skb instantly (so drop_monitor can pinpoint > precise location) and returns NF_STOLEN. > > Prepare call sites to deal with this before introducing such changes > in conntrack and nat core. > > Signed-off-by: Florian Westphal <fw@strlen.de> > --- Reviewed-by: Aaron Conole <aconole@redhat.om>
Hello: This patch was applied to netdev/net-next.git (main) by David S. Miller <davem@davemloft.net>: On Wed, 3 Jul 2024 12:46:34 +0200 you wrote: > At this time, conntrack either returns NF_ACCEPT or NF_DROP. > To improve debuging it would be nice to be able to replace NF_DROP > verdict with NF_DROP_REASON() helper, > > This helper releases the skb instantly (so drop_monitor can pinpoint > precise location) and returns NF_STOLEN. > > [...] Here is the summary with links: - [net-next] openvswitch: prepare for stolen verdict coming from conntrack and nat engine https://git.kernel.org/netdev/net-next/c/c7f79f2620b7 You are awesome, thank you!
diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c index 3b980bf2770b..8eb1d644b741 100644 --- a/net/openvswitch/conntrack.c +++ b/net/openvswitch/conntrack.c @@ -679,6 +679,8 @@ static int ovs_ct_nat(struct net *net, struct sw_flow_key *key, action |= BIT(NF_NAT_MANIP_DST); err = nf_ct_nat(skb, ct, ctinfo, &action, &info->range, info->commit); + if (err != NF_ACCEPT) + return err; if (action & BIT(NF_NAT_MANIP_SRC)) ovs_nat_update_key(key, skb, NF_NAT_MANIP_SRC); @@ -697,6 +699,22 @@ static int ovs_ct_nat(struct net *net, struct sw_flow_key *key, } #endif +static int verdict_to_errno(unsigned int verdict) +{ + switch (verdict & NF_VERDICT_MASK) { + case NF_ACCEPT: + return 0; + case NF_DROP: + return -EINVAL; + case NF_STOLEN: + return -EINPROGRESS; + default: + break; + } + + return -EINVAL; +} + /* Pass 'skb' through conntrack in 'net', using zone configured in 'info', if * not done already. Update key with new CT state after passing the packet * through conntrack. @@ -735,7 +753,7 @@ static int __ovs_ct_lookup(struct net *net, struct sw_flow_key *key, err = nf_conntrack_in(skb, &state); if (err != NF_ACCEPT) - return -ENOENT; + return verdict_to_errno(err); /* Clear CT state NAT flags to mark that we have not yet done * NAT after the nf_conntrack_in() call. We can actually clear @@ -762,9 +780,12 @@ static int __ovs_ct_lookup(struct net *net, struct sw_flow_key *key, * the key->ct_state. */ if (info->nat && !(key->ct_state & OVS_CS_F_NAT_MASK) && - (nf_ct_is_confirmed(ct) || info->commit) && - ovs_ct_nat(net, key, info, skb, ct, ctinfo) != NF_ACCEPT) { - return -EINVAL; + (nf_ct_is_confirmed(ct) || info->commit)) { + int err = ovs_ct_nat(net, key, info, skb, ct, ctinfo); + + err = verdict_to_errno(err); + if (err) + return err; } /* Userspace may decide to perform a ct lookup without a helper @@ -795,9 +816,12 @@ static int __ovs_ct_lookup(struct net *net, struct sw_flow_key *key, * - When committing an unconfirmed connection. */ if ((nf_ct_is_confirmed(ct) ? !cached || add_helper : - info->commit) && - nf_ct_helper(skb, ct, ctinfo, info->family) != NF_ACCEPT) { - return -EINVAL; + info->commit)) { + int err = nf_ct_helper(skb, ct, ctinfo, info->family); + + err = verdict_to_errno(err); + if (err) + return err; } if (nf_ct_protonum(ct) == IPPROTO_TCP && @@ -1001,10 +1025,9 @@ static int ovs_ct_commit(struct net *net, struct sw_flow_key *key, /* This will take care of sending queued events even if the connection * is already confirmed. */ - if (nf_conntrack_confirm(skb) != NF_ACCEPT) - return -EINVAL; + err = nf_conntrack_confirm(skb); - return 0; + return verdict_to_errno(err); } /* Returns 0 on success, -EINPROGRESS if 'skb' is stolen, or other nonzero @@ -1039,6 +1062,10 @@ int ovs_ct_execute(struct net *net, struct sk_buff *skb, else err = ovs_ct_lookup(net, key, info, skb); + /* conntrack core returned NF_STOLEN */ + if (err == -EINPROGRESS) + return err; + skb_push_rcsum(skb, nh_ofs); if (err) ovs_kfree_skb_reason(skb, OVS_DROP_CONNTRACK);
At this time, conntrack either returns NF_ACCEPT or NF_DROP. To improve debuging it would be nice to be able to replace NF_DROP verdict with NF_DROP_REASON() helper, This helper releases the skb instantly (so drop_monitor can pinpoint precise location) and returns NF_STOLEN. Prepare call sites to deal with this before introducing such changes in conntrack and nat core. Signed-off-by: Florian Westphal <fw@strlen.de> --- net/openvswitch/conntrack.c | 47 +++++++++++++++++++++++++++++-------- 1 file changed, 37 insertions(+), 10 deletions(-)