diff mbox series

[net,1/1] net/sched: act_ct: Fix flow table lookup failure with no originating ifindex

Message ID 20220217093424.23601-1-paulb@nvidia.com (mailing list archive)
State Superseded
Delegated to: Netdev Maintainers
Headers show
Series [net,1/1] net/sched: act_ct: Fix flow table lookup failure with no originating ifindex | expand

Checks

Context Check Description
netdev/tree_selection success Clearly marked for net
netdev/fixes_present success Fixes tag present in non-next series
netdev/subject_prefix success Link
netdev/cover_letter success Single patches do not need cover letters
netdev/patch_count success Link
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 92 this patch: 92
netdev/cc_maintainers warning 2 maintainers not CCed: fw@strlen.de jiri@resnulli.us
netdev/build_clang success Errors and warnings before: 0 this patch: 0
netdev/module_param success Was 0 now: 0
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/verify_fixes fail Problems with Fixes tag: 1
netdev/build_allmodconfig_warn success Errors and warnings before: 91 this patch: 91
netdev/checkpatch warning WARNING: line length of 81 exceeds 80 columns WARNING: line length of 83 exceeds 80 columns WARNING: line length of 86 exceeds 80 columns WARNING: line length of 90 exceeds 80 columns
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0

Commit Message

Paul Blakey Feb. 17, 2022, 9:34 a.m. UTC
After cited commit optimizted hw insertion, flow table entries are
populated with ifindex information which was intended to only be used
for HW offload. This tuple ifindex is hashed in the flow table key, so
it must be filled for lookup to be successful. But tuple ifindex is only
relevant for the netfilter flowtables (nft), so it's not filled in
act_ct flow table lookup, resulting in lookup failure, and no SW
offload and no offload teardown for TCP connection FIN/RST packets.

To fix this, allow flow tables that don't hash the ifindex.
Netfilter flow tables will keep using ifindex for a more specific
offload, while act_ct will not.

Fixes: 9795ded7f924 ("net/sched: act_ct: Fill offloading tupledx")
Signed-off-by: Paul Blakey <paulb@nvidia.com>
---
 include/net/netfilter/nf_flow_table.h | 8 ++++----
 net/netfilter/nf_flow_table_core.c    | 6 ++++++
 net/sched/act_ct.c                    | 3 ++-
 3 files changed, 12 insertions(+), 5 deletions(-)

Comments

Pablo Neira Ayuso Feb. 17, 2022, 1:55 p.m. UTC | #1
On Thu, Feb 17, 2022 at 11:34:24AM +0200, Paul Blakey wrote:
> After cited commit optimizted hw insertion, flow table entries are
> populated with ifindex information which was intended to only be used
> for HW offload. This tuple ifindex is hashed in the flow table key, so
> it must be filled for lookup to be successful. But tuple ifindex is only
> relevant for the netfilter flowtables (nft), so it's not filled in
> act_ct flow table lookup, resulting in lookup failure, and no SW
> offload and no offload teardown for TCP connection FIN/RST packets.
> 
> To fix this, allow flow tables that don't hash the ifindex.
> Netfilter flow tables will keep using ifindex for a more specific
> offload, while act_ct will not.

Using iif == zero should be enough to specify not set?

> Fixes: 9795ded7f924 ("net/sched: act_ct: Fill offloading tupledx")
> Signed-off-by: Paul Blakey <paulb@nvidia.com>
> ---
>  include/net/netfilter/nf_flow_table.h | 8 ++++----
>  net/netfilter/nf_flow_table_core.c    | 6 ++++++
>  net/sched/act_ct.c                    | 3 ++-
>  3 files changed, 12 insertions(+), 5 deletions(-)
> 
> diff --git a/include/net/netfilter/nf_flow_table.h b/include/net/netfilter/nf_flow_table.h
> index a3647fadf1cc..9b474414a936 100644
> --- a/include/net/netfilter/nf_flow_table.h
> +++ b/include/net/netfilter/nf_flow_table.h
> @@ -64,8 +64,9 @@ struct nf_flowtable_type {
>  };
>  
>  enum nf_flowtable_flags {
> -	NF_FLOWTABLE_HW_OFFLOAD		= 0x1,	/* NFT_FLOWTABLE_HW_OFFLOAD */
> -	NF_FLOWTABLE_COUNTER		= 0x2,	/* NFT_FLOWTABLE_COUNTER */
> +	NF_FLOWTABLE_HW_OFFLOAD			= 0x1,	/* NFT_FLOWTABLE_HW_OFFLOAD */
> +	NF_FLOWTABLE_COUNTER			= 0x2,	/* NFT_FLOWTABLE_COUNTER */
> +	NF_FLOWTABLE_NO_IFINDEX_FILTERING	= 0x4,	/* Only used by act_ct */
>  };
>  
>  struct nf_flowtable {
> @@ -114,8 +115,6 @@ struct flow_offload_tuple {
>  		__be16			dst_port;
>  	};
>  
> -	int				iifidx;
> -
>  	u8				l3proto;
>  	u8				l4proto;
>  	struct {
> @@ -126,6 +125,7 @@ struct flow_offload_tuple {
>  	/* All members above are keys for lookups, see flow_offload_hash(). */
>  	struct { }			__hash;
>  
> +	int				iifidx;
>  	u8				dir:2,
>  					xmit_type:2,
>  					encap_num:2,
> diff --git a/net/netfilter/nf_flow_table_core.c b/net/netfilter/nf_flow_table_core.c
> index b90eca7a2f22..f0cb2c7075c0 100644
> --- a/net/netfilter/nf_flow_table_core.c
> +++ b/net/netfilter/nf_flow_table_core.c
> @@ -254,9 +254,15 @@ static u32 flow_offload_hash_obj(const void *data, u32 len, u32 seed)
>  static int flow_offload_hash_cmp(struct rhashtable_compare_arg *arg,
>  					const void *ptr)
>  {
> +	const struct nf_flowtable *flow_table = container_of(arg->ht, struct nf_flowtable,
> +							     rhashtable);
>  	const struct flow_offload_tuple *tuple = arg->key;
>  	const struct flow_offload_tuple_rhash *x = ptr;
>  
> +	if (!(flow_table->flags & NF_FLOWTABLE_NO_IFINDEX_FILTERING) &&
> +	    x->tuple.iifidx != tuple->iifidx)
> +		return 1;
> +
>  	if (memcmp(&x->tuple, tuple, offsetof(struct flow_offload_tuple, __hash)))
>  		return 1;
>  
> diff --git a/net/sched/act_ct.c b/net/sched/act_ct.c
> index f99247fc6468..22cd32ec9889 100644
> --- a/net/sched/act_ct.c
> +++ b/net/sched/act_ct.c
> @@ -305,7 +305,8 @@ static int tcf_ct_flow_table_get(struct tcf_ct_params *params)
>  
>  	ct_ft->nf_ft.type = &flowtable_ct;
>  	ct_ft->nf_ft.flags |= NF_FLOWTABLE_HW_OFFLOAD |
> -			      NF_FLOWTABLE_COUNTER;
> +			      NF_FLOWTABLE_COUNTER |
> +			      NF_FLOWTABLE_NO_IFINDEX_FILTERING;
>  	err = nf_flow_table_init(&ct_ft->nf_ft);
>  	if (err)
>  		goto err_init;
> -- 
> 2.30.1
>
Jakub Kicinski Feb. 17, 2022, 4:55 p.m. UTC | #2
On Thu, 17 Feb 2022 11:34:24 +0200 Paul Blakey wrote:
> Fixes: 9795ded7f924 ("net/sched: act_ct: Fill offloading tupledx")

Fixes tag: Fixes: 9795ded7f924 ("net/sched: act_ct: Fill offloading tupledx")
Has these problem(s):
	- Subject does not match target commit subject
	  Just use
		git log -1 --format='Fixes: %h ("%s")'
Marcelo Ricardo Leitner Feb. 17, 2022, 11:16 p.m. UTC | #3
On Thu, Feb 17, 2022 at 11:34:24AM +0200, Paul Blakey wrote:
> After cited commit optimizted hw insertion, flow table entries are
> populated with ifindex information which was intended to only be used
> for HW offload. This tuple ifindex is hashed in the flow table key, so
> it must be filled for lookup to be successful. But tuple ifindex is only
> relevant for the netfilter flowtables (nft), so it's not filled in
> act_ct flow table lookup, resulting in lookup failure, and no SW
> offload and no offload teardown for TCP connection FIN/RST packets.
> 
> To fix this, allow flow tables that don't hash the ifindex.
> Netfilter flow tables will keep using ifindex for a more specific
> offload, while act_ct will not.
> 
> Fixes: 9795ded7f924 ("net/sched: act_ct: Fill offloading tupledx")

The fixes tag got corrupted. It should have been:
Fixes: 9795ded7f924 ("net/sched: act_ct: Fill offloading tuple iifidx")

Not sure if it needs a respin or not, but:
Reviewed-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>

> Signed-off-by: Paul Blakey <paulb@nvidia.com>
> ---
Marcelo Ricardo Leitner Feb. 17, 2022, 11:27 p.m. UTC | #4
On Thu, Feb 17, 2022 at 02:55:27PM +0100, Pablo Neira Ayuso wrote:
> On Thu, Feb 17, 2022 at 11:34:24AM +0200, Paul Blakey wrote:
> > After cited commit optimizted hw insertion, flow table entries are
> > populated with ifindex information which was intended to only be used
> > for HW offload. This tuple ifindex is hashed in the flow table key, so
> > it must be filled for lookup to be successful. But tuple ifindex is only
> > relevant for the netfilter flowtables (nft), so it's not filled in
> > act_ct flow table lookup, resulting in lookup failure, and no SW
> > offload and no offload teardown for TCP connection FIN/RST packets.
> > 
> > To fix this, allow flow tables that don't hash the ifindex.
> > Netfilter flow tables will keep using ifindex for a more specific
> > offload, while act_ct will not.
> 
> Using iif == zero should be enough to specify not set?

You mean, when searching, if search input iif == zero, to simply not
check it? That seems dangerous somehow.
Pablo Neira Ayuso Feb. 17, 2022, 11:55 p.m. UTC | #5
On Thu, Feb 17, 2022 at 08:27:08PM -0300, Marcelo Ricardo Leitner wrote:
> On Thu, Feb 17, 2022 at 02:55:27PM +0100, Pablo Neira Ayuso wrote:
> > On Thu, Feb 17, 2022 at 11:34:24AM +0200, Paul Blakey wrote:
> > > After cited commit optimizted hw insertion, flow table entries are
> > > populated with ifindex information which was intended to only be used
> > > for HW offload. This tuple ifindex is hashed in the flow table key, so
> > > it must be filled for lookup to be successful. But tuple ifindex is only
> > > relevant for the netfilter flowtables (nft), so it's not filled in
> > > act_ct flow table lookup, resulting in lookup failure, and no SW
> > > offload and no offload teardown for TCP connection FIN/RST packets.
> > > 
> > > To fix this, allow flow tables that don't hash the ifindex.
> > > Netfilter flow tables will keep using ifindex for a more specific
> > > offload, while act_ct will not.
> > 
> > Using iif == zero should be enough to specify not set?
> 
> You mean, when searching, if search input iif == zero, to simply not
> check it? That seems dangerous somehow.

dev_new_index() does not allocate ifindex as zero.

Anyway, @Paul: could you add a tc_ifidx field instead in the union
right after __hash instead to fix 9795ded7f924?

Thanks.
Pablo Neira Ayuso Feb. 18, 2022, 12:05 a.m. UTC | #6
On Fri, Feb 18, 2022 at 12:55:07AM +0100, Pablo Neira Ayuso wrote:
> On Thu, Feb 17, 2022 at 08:27:08PM -0300, Marcelo Ricardo Leitner wrote:
> > On Thu, Feb 17, 2022 at 02:55:27PM +0100, Pablo Neira Ayuso wrote:
> > > On Thu, Feb 17, 2022 at 11:34:24AM +0200, Paul Blakey wrote:
> > > > After cited commit optimizted hw insertion, flow table entries are
> > > > populated with ifindex information which was intended to only be used
> > > > for HW offload. This tuple ifindex is hashed in the flow table key, so
> > > > it must be filled for lookup to be successful. But tuple ifindex is only
> > > > relevant for the netfilter flowtables (nft), so it's not filled in
> > > > act_ct flow table lookup, resulting in lookup failure, and no SW
> > > > offload and no offload teardown for TCP connection FIN/RST packets.
> > > > 
> > > > To fix this, allow flow tables that don't hash the ifindex.
> > > > Netfilter flow tables will keep using ifindex for a more specific
> > > > offload, while act_ct will not.
> > > 
> > > Using iif == zero should be enough to specify not set?
> > 
> > You mean, when searching, if search input iif == zero, to simply not
> > check it? That seems dangerous somehow.
> 
> dev_new_index() does not allocate ifindex as zero.
> 
> Anyway, @Paul: could you add a tc_ifidx field instead in the union
> right after __hash instead to fix 9795ded7f924?

I mean this incomplete patch below:

diff --git a/include/net/netfilter/nf_flow_table.h b/include/net/netfilter/nf_flow_table.h
index a3647fadf1cc..d4fa4f716f68 100644
--- a/include/net/netfilter/nf_flow_table.h
+++ b/include/net/netfilter/nf_flow_table.h
@@ -142,6 +142,7 @@ struct flow_offload_tuple {
                        u8              h_source[ETH_ALEN];
                        u8              h_dest[ETH_ALEN];
                } out;
+               u32                     tc_ifidx;
        };
 };

You will need to update nf_flow_rule_match() to set key->meta.ingress_ifindex to
use tc_ifidx if it is set to non-zero value.
Paul Blakey Feb. 20, 2022, 8:48 a.m. UTC | #7
On Fri, 18 Feb 2022, Pablo Neira Ayuso wrote:

> On Fri, Feb 18, 2022 at 12:55:07AM +0100, Pablo Neira Ayuso wrote:
> > On Thu, Feb 17, 2022 at 08:27:08PM -0300, Marcelo Ricardo Leitner wrote:
> > > On Thu, Feb 17, 2022 at 02:55:27PM +0100, Pablo Neira Ayuso wrote:
> > > > On Thu, Feb 17, 2022 at 11:34:24AM +0200, Paul Blakey wrote:
> > > > > After cited commit optimizted hw insertion, flow table entries are
> > > > > populated with ifindex information which was intended to only be used
> > > > > for HW offload. This tuple ifindex is hashed in the flow table key, so
> > > > > it must be filled for lookup to be successful. But tuple ifindex is only
> > > > > relevant for the netfilter flowtables (nft), so it's not filled in
> > > > > act_ct flow table lookup, resulting in lookup failure, and no SW
> > > > > offload and no offload teardown for TCP connection FIN/RST packets.
> > > > > 
> > > > > To fix this, allow flow tables that don't hash the ifindex.
> > > > > Netfilter flow tables will keep using ifindex for a more specific
> > > > > offload, while act_ct will not.
> > > > 
> > > > Using iif == zero should be enough to specify not set?
> > > 
> > > You mean, when searching, if search input iif == zero, to simply not
> > > check it? That seems dangerous somehow.
> > 
> > dev_new_index() does not allocate ifindex as zero.
> > 
> > Anyway, @Paul: could you add a tc_ifidx field instead in the union
> > right after __hash instead to fix 9795ded7f924?
> 
> I mean this incomplete patch below:
> 
> diff --git a/include/net/netfilter/nf_flow_table.h b/include/net/netfilter/nf_flow_table.h
> index a3647fadf1cc..d4fa4f716f68 100644
> --- a/include/net/netfilter/nf_flow_table.h
> +++ b/include/net/netfilter/nf_flow_table.h
> @@ -142,6 +142,7 @@ struct flow_offload_tuple {
>                         u8              h_source[ETH_ALEN];
>                         u8              h_dest[ETH_ALEN];
>                 } out;
> +               u32                     tc_ifidx;
>         };
>  };
> 
> You will need to update nf_flow_rule_match() to set key->meta.ingress_ifindex to
> use tc_ifidx if it is set to non-zero value.
> 

I  understand how it could fix the original issue, but I don't think this
is better, because it makes tuple less generic. What you suggested with 
using 0 to avoid needing the new flag is good enough for me, and is 
cleaner in my opinion.

I'll send the == 0 one as V2 for chance you agree, and if you want to 
change to this, I won't mind sending it as V3.
diff mbox series

Patch

diff --git a/include/net/netfilter/nf_flow_table.h b/include/net/netfilter/nf_flow_table.h
index a3647fadf1cc..9b474414a936 100644
--- a/include/net/netfilter/nf_flow_table.h
+++ b/include/net/netfilter/nf_flow_table.h
@@ -64,8 +64,9 @@  struct nf_flowtable_type {
 };
 
 enum nf_flowtable_flags {
-	NF_FLOWTABLE_HW_OFFLOAD		= 0x1,	/* NFT_FLOWTABLE_HW_OFFLOAD */
-	NF_FLOWTABLE_COUNTER		= 0x2,	/* NFT_FLOWTABLE_COUNTER */
+	NF_FLOWTABLE_HW_OFFLOAD			= 0x1,	/* NFT_FLOWTABLE_HW_OFFLOAD */
+	NF_FLOWTABLE_COUNTER			= 0x2,	/* NFT_FLOWTABLE_COUNTER */
+	NF_FLOWTABLE_NO_IFINDEX_FILTERING	= 0x4,	/* Only used by act_ct */
 };
 
 struct nf_flowtable {
@@ -114,8 +115,6 @@  struct flow_offload_tuple {
 		__be16			dst_port;
 	};
 
-	int				iifidx;
-
 	u8				l3proto;
 	u8				l4proto;
 	struct {
@@ -126,6 +125,7 @@  struct flow_offload_tuple {
 	/* All members above are keys for lookups, see flow_offload_hash(). */
 	struct { }			__hash;
 
+	int				iifidx;
 	u8				dir:2,
 					xmit_type:2,
 					encap_num:2,
diff --git a/net/netfilter/nf_flow_table_core.c b/net/netfilter/nf_flow_table_core.c
index b90eca7a2f22..f0cb2c7075c0 100644
--- a/net/netfilter/nf_flow_table_core.c
+++ b/net/netfilter/nf_flow_table_core.c
@@ -254,9 +254,15 @@  static u32 flow_offload_hash_obj(const void *data, u32 len, u32 seed)
 static int flow_offload_hash_cmp(struct rhashtable_compare_arg *arg,
 					const void *ptr)
 {
+	const struct nf_flowtable *flow_table = container_of(arg->ht, struct nf_flowtable,
+							     rhashtable);
 	const struct flow_offload_tuple *tuple = arg->key;
 	const struct flow_offload_tuple_rhash *x = ptr;
 
+	if (!(flow_table->flags & NF_FLOWTABLE_NO_IFINDEX_FILTERING) &&
+	    x->tuple.iifidx != tuple->iifidx)
+		return 1;
+
 	if (memcmp(&x->tuple, tuple, offsetof(struct flow_offload_tuple, __hash)))
 		return 1;
 
diff --git a/net/sched/act_ct.c b/net/sched/act_ct.c
index f99247fc6468..22cd32ec9889 100644
--- a/net/sched/act_ct.c
+++ b/net/sched/act_ct.c
@@ -305,7 +305,8 @@  static int tcf_ct_flow_table_get(struct tcf_ct_params *params)
 
 	ct_ft->nf_ft.type = &flowtable_ct;
 	ct_ft->nf_ft.flags |= NF_FLOWTABLE_HW_OFFLOAD |
-			      NF_FLOWTABLE_COUNTER;
+			      NF_FLOWTABLE_COUNTER |
+			      NF_FLOWTABLE_NO_IFINDEX_FILTERING;
 	err = nf_flow_table_init(&ct_ft->nf_ft);
 	if (err)
 		goto err_init;