diff mbox series

[v2,1/4] mptcp: fix warning in __skb_flow_dissect() when do syn cookie for subflow join

Message ID ed5254b2-baa9-ea4c-df45-c61fa00622b9@163.com (mailing list archive)
State Superseded, archived
Delegated to: Paolo Abeni
Headers show
Series Fix some mptcp syncookie process bugs | expand

Commit Message

Jianguo Wu June 10, 2021, 9:28 a.m. UTC
From: Jianguo Wu <wujianguo@chinatelecom.cn>

I got the following warning message while doing the test:

[   55.552626] TCP: request_sock_subflow: Possible SYN flooding on port 8099. Sending cookies.  Check SNMP counters.
[   55.553024] ------------[ cut here ]------------
[   55.553027] WARNING: CPU: 0 PID: 10 at net/core/flow_dissector.c:984 __skb_flow_dissect+0x280/0x1650
...
[   55.553117] CPU: 0 PID: 10 Comm: ksoftirqd/0 Not tainted 5.12.0+ #18
[   55.553121] Hardware name: VMware, Inc. VMware Virtual Platform/440BX Desktop Reference Platform, BIOS 6.00 02/27/2020
[   55.553124] RIP: 0010:__skb_flow_dissect+0x280/0x1650
...
[   55.553133] RSP: 0018:ffffb79580087770 EFLAGS: 00010246
[   55.553137] RAX: 0000000000000000 RBX: ffffffff8ddb58e0 RCX: ffffb79580087888
[   55.553139] RDX: ffffffff8ddb58e0 RSI: ffff8f7e4652b600 RDI: 0000000000000000
[   55.553141] RBP: ffffb79580087858 R08: 0000000000000000 R09: 0000000000000008
[   55.553143] R10: 000000008c622965 R11: 00000000d3313a5b R12: ffff8f7e4652b600
[   55.553146] R13: ffff8f7e465c9062 R14: 0000000000000000 R15: ffffb79580087888
[   55.553149] FS:  0000000000000000(0000) GS:ffff8f7f75e00000(0000) knlGS:0000000000000000
[   55.553152] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[   55.553154] CR2: 00007f73d1d19000 CR3: 0000000135e10004 CR4: 00000000003706f0
[   55.553160] Call Trace:
[   55.553166]  ? __sha256_final+0x67/0xd0
[   55.553173]  ? sha256+0x7e/0xa0
[   55.553177]  __skb_get_hash+0x57/0x210
[   55.553182]  subflow_init_req_cookie_join_save+0xac/0xc0
[   55.553189]  subflow_check_req+0x474/0x550
[   55.553195]  ? ip_route_output_key_hash+0x67/0x90
[   55.553200]  ? xfrm_lookup_route+0x1d/0xa0
[   55.553207]  subflow_v4_route_req+0x8e/0xd0
[   55.553212]  tcp_conn_request+0x31e/0xab0
[   55.553218]  ? selinux_socket_sock_rcv_skb+0x116/0x210
[   55.553224]  ? tcp_rcv_state_process+0x179/0x6d0
[   55.553229]  tcp_rcv_state_process+0x179/0x6d0
[   55.553235]  tcp_v4_do_rcv+0xaf/0x220
[   55.553239]  tcp_v4_rcv+0xce4/0xd80
[   55.553243]  ? ip_route_input_rcu+0x246/0x260
[   55.553248]  ip_protocol_deliver_rcu+0x35/0x1b0
[   55.553253]  ip_local_deliver_finish+0x44/0x50
[   55.553258]  ip_local_deliver+0x6c/0x110
[   55.553262]  ? ip_rcv_finish_core.isra.19+0x5a/0x400
[   55.553267]  ip_rcv+0xd1/0xe0
...

After debugging, I found in __skb_flow_dissect(), skb->dev and skb->sk are both NULL,
then net is NULL, and trigger WARN_ON_ONCE(!net), actually net is always NULL in this
code path, as skb->dev is set to NULL in tcp_v4_rcv(), and skb->sk is never set.

Code snippet in __skb_flow_dissect() that trigger warning:
  975         if (skb) {
  976                 if (!net) {
  977                         if (skb->dev)
  978                                 net = dev_net(skb->dev);
  979                         else if (skb->sk)
  980                                 net = sock_net(skb->sk);
  981                 }
  982         }
  983
  984         WARN_ON_ONCE(!net);

So, use 4-tuple derived hash.

Fixes: 9466a1ccebbe("mptcp: enable JOIN requests even if cookies are in use").
Suggested-by: Paolo Abeni <pabeni@redhat.com>
Signed-off-by: Jianguo Wu <wujianguo@chinatelecom.cn>
---
 net/mptcp/syncookies.c | 57 ++++++++++++++++++++++++++++++++++++++++++++++----
 1 file changed, 53 insertions(+), 4 deletions(-)

Comments

Paolo Abeni June 10, 2021, 10:19 a.m. UTC | #1
On Thu, 2021-06-10 at 17:28 +0800, Jianguo Wu wrote:
> From: Jianguo Wu <wujianguo@chinatelecom.cn>
> 
> I got the following warning message while doing the test:
> 
> [   55.552626] TCP: request_sock_subflow: Possible SYN flooding on port 8099. Sending cookies.  Check SNMP counters.
> [   55.553024] ------------[ cut here ]------------
> [   55.553027] WARNING: CPU: 0 PID: 10 at net/core/flow_dissector.c:984 __skb_flow_dissect+0x280/0x1650
> ...
> [   55.553117] CPU: 0 PID: 10 Comm: ksoftirqd/0 Not tainted 5.12.0+ #18
> [   55.553121] Hardware name: VMware, Inc. VMware Virtual Platform/440BX Desktop Reference Platform, BIOS 6.00 02/27/2020
> [   55.553124] RIP: 0010:__skb_flow_dissect+0x280/0x1650
> ...
> [   55.553133] RSP: 0018:ffffb79580087770 EFLAGS: 00010246
> [   55.553137] RAX: 0000000000000000 RBX: ffffffff8ddb58e0 RCX: ffffb79580087888
> [   55.553139] RDX: ffffffff8ddb58e0 RSI: ffff8f7e4652b600 RDI: 0000000000000000
> [   55.553141] RBP: ffffb79580087858 R08: 0000000000000000 R09: 0000000000000008
> [   55.553143] R10: 000000008c622965 R11: 00000000d3313a5b R12: ffff8f7e4652b600
> [   55.553146] R13: ffff8f7e465c9062 R14: 0000000000000000 R15: ffffb79580087888
> [   55.553149] FS:  0000000000000000(0000) GS:ffff8f7f75e00000(0000) knlGS:0000000000000000
> [   55.553152] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
> [   55.553154] CR2: 00007f73d1d19000 CR3: 0000000135e10004 CR4: 00000000003706f0
> [   55.553160] Call Trace:
> [   55.553166]  ? __sha256_final+0x67/0xd0
> [   55.553173]  ? sha256+0x7e/0xa0
> [   55.553177]  __skb_get_hash+0x57/0x210
> [   55.553182]  subflow_init_req_cookie_join_save+0xac/0xc0
> [   55.553189]  subflow_check_req+0x474/0x550
> [   55.553195]  ? ip_route_output_key_hash+0x67/0x90
> [   55.553200]  ? xfrm_lookup_route+0x1d/0xa0
> [   55.553207]  subflow_v4_route_req+0x8e/0xd0
> [   55.553212]  tcp_conn_request+0x31e/0xab0
> [   55.553218]  ? selinux_socket_sock_rcv_skb+0x116/0x210
> [   55.553224]  ? tcp_rcv_state_process+0x179/0x6d0
> [   55.553229]  tcp_rcv_state_process+0x179/0x6d0
> [   55.553235]  tcp_v4_do_rcv+0xaf/0x220
> [   55.553239]  tcp_v4_rcv+0xce4/0xd80
> [   55.553243]  ? ip_route_input_rcu+0x246/0x260
> [   55.553248]  ip_protocol_deliver_rcu+0x35/0x1b0
> [   55.553253]  ip_local_deliver_finish+0x44/0x50
> [   55.553258]  ip_local_deliver+0x6c/0x110
> [   55.553262]  ? ip_rcv_finish_core.isra.19+0x5a/0x400
> [   55.553267]  ip_rcv+0xd1/0xe0
> ...
> 
> After debugging, I found in __skb_flow_dissect(), skb->dev and skb->sk are both NULL,
> then net is NULL, and trigger WARN_ON_ONCE(!net), actually net is always NULL in this
> code path, as skb->dev is set to NULL in tcp_v4_rcv(), and skb->sk is never set.
> 
> Code snippet in __skb_flow_dissect() that trigger warning:
>   975         if (skb) {
>   976                 if (!net) {
>   977                         if (skb->dev)
>   978                                 net = dev_net(skb->dev);
>   979                         else if (skb->sk)
>   980                                 net = sock_net(skb->sk);
>   981                 }
>   982         }
>   983
>   984         WARN_ON_ONCE(!net);
> 
> So, use 4-tuple derived hash.
> 
> Fixes: 9466a1ccebbe("mptcp: enable JOIN requests even if cookies are in use").
> Suggested-by: Paolo Abeni <pabeni@redhat.com>
> Signed-off-by: Jianguo Wu <wujianguo@chinatelecom.cn>
> ---
>  net/mptcp/syncookies.c | 57 ++++++++++++++++++++++++++++++++++++++++++++++----
>  1 file changed, 53 insertions(+), 4 deletions(-)
> 
> diff --git a/net/mptcp/syncookies.c b/net/mptcp/syncookies.c
> index abe0fd0..11721b3 100644
> --- a/net/mptcp/syncookies.c
> +++ b/net/mptcp/syncookies.c
> @@ -35,13 +35,62 @@ struct join_entry {
>  static struct join_entry join_entries[COOKIE_JOIN_SLOTS] __cacheline_aligned_in_smp;
>  static spinlock_t join_entry_locks[COOKIE_JOIN_SLOTS] __cacheline_aligned_in_smp;
> 
> -static u32 mptcp_join_entry_hash(struct sk_buff *skb, struct net *net)
> +static u32 mptcp_join_hashfn(const struct net *net, const __be32 laddr,
> +			     const __be16 lport, const __be32 faddr,
> +			     const __be16 fport)
>  {
> -	u32 i = skb_get_hash(skb) ^ net_hash_mix(net);
> +	static u32 mptcp_join_hash_secret __read_mostly;
> +	u32 i;
> +
> +	net_get_random_once(&mptcp_join_hash_secret,
> +			    sizeof(mptcp_join_hash_secret));
> +
> +	i = jhash_3words((__force __u32) laddr,
> +			 (__force __u32) faddr,
> +			 ((__u32) lport) << 16 | (__force __u32)fport,
> +			 mptcp_join_hash_secret + net_hash_mix(net));
> +
> +	return i % ARRAY_SIZE(join_entries);
> +}
> +
> +static u32 mptcp_join_hashfn_inet6(const struct net *net,
> +				   const struct in6_addr *laddr, const u16 lport,
> +				   const struct in6_addr *faddr, const __be16 fport)
> +{
> +        static u32 mptcp_join_hash_secret_v6 __read_mostly;
> +        static u32 ipv6_hash_secret __read_mostly;
> +        u32 lhash, fhash, ports, i;
> +
> +        net_get_random_once(&mptcp_join_hash_secret_v6,
> +			    sizeof(mptcp_join_hash_secret_v6));
> +        net_get_random_once(&ipv6_hash_secret, sizeof(ipv6_hash_secret));
> +
> +        lhash = (__force u32)laddr->s6_addr32[3];
> +        fhash = __ipv6_addr_jhash(faddr, ipv6_hash_secret);
> +	ports = (((u32)lport) << 16) | (__force u32)fport;
> +
> +	i = jhash_3words(lhash, fhash, ports,
> +			 mptcp_join_hash_secret_v6 + net_hash_mix(net));

The above codes uses spaces instead of tabs. More importantly you can
directly use inet6_ehashfn(), since such function is already visible.

I'm unsure if we could directly use inet_ehashfn() here: it will
require making such function visible, and could affect TCP performances
(in a very minor way) as the compiler may refuse to inline such
function once that is not 'static'

@Florian, @Mat, WDYT?!?

/P
Florian Westphal June 11, 2021, 1:07 p.m. UTC | #2
Paolo Abeni <pabeni@redhat.com> wrote:
> On Thu, 2021-06-10 at 17:28 +0800, Jianguo Wu wrote:
> > From: Jianguo Wu <wujianguo@chinatelecom.cn>
> > 
> > I got the following warning message while doing the test:
> > 
> The above codes uses spaces instead of tabs. More importantly you can
> directly use inet6_ehashfn(), since such function is already visible.
> 
> I'm unsure if we could directly use inet_ehashfn() here: it will
> require making such function visible, and could affect TCP performances
> (in a very minor way) as the compiler may refuse to inline such
> function once that is not 'static'
> 
> @Florian, @Mat, WDYT?!?

I think this is all massive over-engineering.
Probably its enough to do

  th->seq % ARRAY_SIZE(join_entries);

... without any hashing.

I'd suggest to go for

jhash_3words(th->seq, net_hash_mix(net), th->sport << 16| th->dport, &secret)

and ignore network headers altogether.
diff mbox series

Patch

diff --git a/net/mptcp/syncookies.c b/net/mptcp/syncookies.c
index abe0fd0..11721b3 100644
--- a/net/mptcp/syncookies.c
+++ b/net/mptcp/syncookies.c
@@ -35,13 +35,62 @@  struct join_entry {
 static struct join_entry join_entries[COOKIE_JOIN_SLOTS] __cacheline_aligned_in_smp;
 static spinlock_t join_entry_locks[COOKIE_JOIN_SLOTS] __cacheline_aligned_in_smp;

-static u32 mptcp_join_entry_hash(struct sk_buff *skb, struct net *net)
+static u32 mptcp_join_hashfn(const struct net *net, const __be32 laddr,
+			     const __be16 lport, const __be32 faddr,
+			     const __be16 fport)
 {
-	u32 i = skb_get_hash(skb) ^ net_hash_mix(net);
+	static u32 mptcp_join_hash_secret __read_mostly;
+	u32 i;
+
+	net_get_random_once(&mptcp_join_hash_secret,
+			    sizeof(mptcp_join_hash_secret));
+
+	i = jhash_3words((__force __u32) laddr,
+			 (__force __u32) faddr,
+			 ((__u32) lport) << 16 | (__force __u32)fport,
+			 mptcp_join_hash_secret + net_hash_mix(net));
+
+	return i % ARRAY_SIZE(join_entries);
+}
+
+static u32 mptcp_join_hashfn_inet6(const struct net *net,
+				   const struct in6_addr *laddr, const u16 lport,
+				   const struct in6_addr *faddr, const __be16 fport)
+{
+        static u32 mptcp_join_hash_secret_v6 __read_mostly;
+        static u32 ipv6_hash_secret __read_mostly;
+        u32 lhash, fhash, ports, i;
+
+        net_get_random_once(&mptcp_join_hash_secret_v6,
+			    sizeof(mptcp_join_hash_secret_v6));
+        net_get_random_once(&ipv6_hash_secret, sizeof(ipv6_hash_secret));
+
+        lhash = (__force u32)laddr->s6_addr32[3];
+        fhash = __ipv6_addr_jhash(faddr, ipv6_hash_secret);
+	ports = (((u32)lport) << 16) | (__force u32)fport;
+
+	i = jhash_3words(lhash, fhash, ports,
+			 mptcp_join_hash_secret_v6 + net_hash_mix(net));

 	return i % ARRAY_SIZE(join_entries);
 }

+static u32 mptcp_join_entry_hash(struct sk_buff *skb, struct net *net,
+				 unsigned short family)
+{
+	struct tcphdr *th = tcp_hdr(skb);
+
+#if IS_ENABLED(CONFIG_IPV6)
+	if (family == AF_INET6 &&
+	    !ipv6_addr_v4mapped(&ipv6_hdr(skb)->saddr))
+		return mptcp_join_hashfn_inet6(net,
+					       &ipv6_hdr(skb)->daddr, th->dest,
+					       &ipv6_hdr(skb)->saddr, th->source);
+#endif
+	return mptcp_join_hashfn(net, ip_hdr(skb)->daddr, th->dest,
+				 ip_hdr(skb)->saddr, th->source);
+}
+
 static void mptcp_join_store_state(struct join_entry *entry,
 				   const struct mptcp_subflow_request_sock *subflow_req)
 {
@@ -58,7 +107,7 @@  void subflow_init_req_cookie_join_save(const struct mptcp_subflow_request_sock *
 				       struct sk_buff *skb)
 {
 	struct net *net = read_pnet(&subflow_req->sk.req.ireq_net);
-	u32 i = mptcp_join_entry_hash(skb, net);
+	u32 i = mptcp_join_entry_hash(skb, net, subflow_req->sk.req.ireq_family);

 	/* No use in waiting if other cpu is already using this slot --
 	 * would overwrite the data that got stored.
@@ -79,7 +128,7 @@  bool mptcp_token_join_cookie_init_state(struct mptcp_subflow_request_sock *subfl
 					struct sk_buff *skb)
 {
 	struct net *net = read_pnet(&subflow_req->sk.req.ireq_net);
-	u32 i = mptcp_join_entry_hash(skb, net);
+	u32 i = mptcp_join_entry_hash(skb, net, subflow_req->sk.req.ireq_family);
 	struct mptcp_sock *msk;
 	struct join_entry *e;