Message ID | 20231124002720.102537-7-dima@arista.com (mailing list archive) |
---|---|
State | Superseded |
Delegated to: | Netdev Maintainers |
Headers | show |
Series | [v2,1/7] Documentation/tcp: Fix an obvious typo | expand |
On Fri, 2023-11-24 at 00:27 +0000, Dmitry Safonov wrote: > RFC 5925 (6.2): > > TCP-AO emulates a 64-bit sequence number space by inferring when to > > increment the high-order 32-bit portion (the SNE) based on > > transitions in the low-order portion (the TCP sequence number). > > snd_sne and rcv_sne are the upper 4 bytes of extended SEQ number. > Unfortunately, reading two 4-bytes pointers can't be performed > atomically (without synchronization). > > Let's keep it KISS and add an rwlock - that shouldn't create much > contention as SNE are updated every 4Gb of traffic and the atomic region > is quite small. > > Fixes: 64382c71a557 ("net/tcp: Add TCP-AO SNE support") > Signed-off-by: Dmitry Safonov <dima@arista.com> > --- > include/net/tcp_ao.h | 2 +- > net/ipv4/tcp_ao.c | 34 +++++++++++++++++++++------------- > net/ipv4/tcp_input.c | 16 ++++++++++++++-- > 3 files changed, 36 insertions(+), 16 deletions(-) > > diff --git a/include/net/tcp_ao.h b/include/net/tcp_ao.h > index 647781080613..beea3e6b39e2 100644 > --- a/include/net/tcp_ao.h > +++ b/include/net/tcp_ao.h > @@ -123,6 +123,7 @@ struct tcp_ao_info { > */ > u32 snd_sne; > u32 rcv_sne; > + rwlock_t sne_lock; RW lock are problematic in the networking code, see commit dbca1596bbb08318f5e3b3b99f8ca0a0d3830a65. I think you can use a plain spinlock here, as both read and write appears to be in the fastpath (?!?) > @@ -781,8 +780,10 @@ int tcp_ao_prepare_reset(const struct sock *sk, struct sk_buff *skb, > *traffic_key = snd_other_key(*key); > rnext_key = READ_ONCE(ao_info->rnext_key); > *keyid = rnext_key->rcvid; > - *sne = tcp_ao_compute_sne(READ_ONCE(ao_info->snd_sne), > - snd_basis, seq); > + read_lock_irqsave(&ao_info->sne_lock, flags); > + *sne = tcp_ao_compute_sne(ao_info->snd_sne, > + READ_ONCE(*snd_basis), seq); > + read_unlock_irqrestore(&ao_info->sne_lock, flags); Why are you using the irqsave variant? bh should suffice. Cheers, Paolo
On 11/27/23 11:41, Paolo Abeni wrote: > On Fri, 2023-11-24 at 00:27 +0000, Dmitry Safonov wrote: >> RFC 5925 (6.2): >>> TCP-AO emulates a 64-bit sequence number space by inferring when to >>> increment the high-order 32-bit portion (the SNE) based on >>> transitions in the low-order portion (the TCP sequence number). >> >> snd_sne and rcv_sne are the upper 4 bytes of extended SEQ number. >> Unfortunately, reading two 4-bytes pointers can't be performed >> atomically (without synchronization). >> >> Let's keep it KISS and add an rwlock - that shouldn't create much >> contention as SNE are updated every 4Gb of traffic and the atomic region >> is quite small. >> >> Fixes: 64382c71a557 ("net/tcp: Add TCP-AO SNE support") >> Signed-off-by: Dmitry Safonov <dima@arista.com> >> --- >> include/net/tcp_ao.h | 2 +- >> net/ipv4/tcp_ao.c | 34 +++++++++++++++++++++------------- >> net/ipv4/tcp_input.c | 16 ++++++++++++++-- >> 3 files changed, 36 insertions(+), 16 deletions(-) >> >> diff --git a/include/net/tcp_ao.h b/include/net/tcp_ao.h >> index 647781080613..beea3e6b39e2 100644 >> --- a/include/net/tcp_ao.h >> +++ b/include/net/tcp_ao.h >> @@ -123,6 +123,7 @@ struct tcp_ao_info { >> */ >> u32 snd_sne; >> u32 rcv_sne; >> + rwlock_t sne_lock; > > RW lock are problematic in the networking code, see commit > dbca1596bbb08318f5e3b3b99f8ca0a0d3830a65. Thanks, was not aware of this pitfall. > I think you can use a plain spinlock here, as both read and write > appears to be in the fastpath (?!?) Yeah, I wanted to avoid to RX concurrency here as writing happens only once in 4Gb. I'll take another attempt to prevent that in v3. >> @@ -781,8 +780,10 @@ int tcp_ao_prepare_reset(const struct sock *sk, struct sk_buff *skb, >> *traffic_key = snd_other_key(*key); >> rnext_key = READ_ONCE(ao_info->rnext_key); >> *keyid = rnext_key->rcvid; >> - *sne = tcp_ao_compute_sne(READ_ONCE(ao_info->snd_sne), >> - snd_basis, seq); >> + read_lock_irqsave(&ao_info->sne_lock, flags); >> + *sne = tcp_ao_compute_sne(ao_info->snd_sne, >> + READ_ONCE(*snd_basis), seq); >> + read_unlock_irqrestore(&ao_info->sne_lock, flags); > > Why are you using the irqsave variant? bh should suffice. It should, yes :) > > Cheers, > > Paolo > Thanks, Dmitry
diff --git a/include/net/tcp_ao.h b/include/net/tcp_ao.h index 647781080613..beea3e6b39e2 100644 --- a/include/net/tcp_ao.h +++ b/include/net/tcp_ao.h @@ -123,6 +123,7 @@ struct tcp_ao_info { */ u32 snd_sne; u32 rcv_sne; + rwlock_t sne_lock; refcount_t refcnt; /* Protects twsk destruction */ struct rcu_head rcu; }; @@ -212,7 +213,6 @@ enum skb_drop_reason tcp_inbound_ao_hash(struct sock *sk, const struct sk_buff *skb, unsigned short int family, const struct request_sock *req, int l3index, const struct tcp_ao_hdr *aoh); -u32 tcp_ao_compute_sne(u32 next_sne, u32 next_seq, u32 seq); struct tcp_ao_key *tcp_ao_do_lookup(const struct sock *sk, int l3index, const union tcp_ao_addr *addr, int family, int sndid, int rcvid); diff --git a/net/ipv4/tcp_ao.c b/net/ipv4/tcp_ao.c index 2d000e275ce7..74db80aeeef3 100644 --- a/net/ipv4/tcp_ao.c +++ b/net/ipv4/tcp_ao.c @@ -230,6 +230,7 @@ static struct tcp_ao_info *tcp_ao_alloc_info(gfp_t flags) return NULL; INIT_HLIST_HEAD(&ao->head); refcount_set(&ao->refcnt, 1); + rwlock_init(&ao->sne_lock); return ao; } @@ -472,10 +473,8 @@ static int tcp_ao_hash_pseudoheader(unsigned short int family, return -EAFNOSUPPORT; } -u32 tcp_ao_compute_sne(u32 next_sne, u32 next_seq, u32 seq) +static u32 tcp_ao_compute_sne(u32 sne, u32 next_seq, u32 seq) { - u32 sne = next_sne; - if (before(seq, next_seq)) { if (seq > next_seq) sne--; @@ -483,7 +482,6 @@ u32 tcp_ao_compute_sne(u32 next_sne, u32 next_seq, u32 seq) if (seq < next_seq) sne++; } - return sne; } @@ -763,14 +761,15 @@ int tcp_ao_prepare_reset(const struct sock *sk, struct sk_buff *skb, *keyid = (*key)->rcvid; } else { struct tcp_ao_key *rnext_key; - u32 snd_basis; + const u32 *snd_basis; + unsigned long flags; if (sk->sk_state == TCP_TIME_WAIT) { ao_info = rcu_dereference(tcp_twsk(sk)->ao_info); - snd_basis = tcp_twsk(sk)->tw_snd_nxt; + snd_basis = &tcp_twsk(sk)->tw_snd_nxt; } else { ao_info = rcu_dereference(tcp_sk(sk)->ao_info); - snd_basis = tcp_sk(sk)->snd_una; + snd_basis = &tcp_sk(sk)->snd_una; } if (!ao_info) return -ENOENT; @@ -781,8 +780,10 @@ int tcp_ao_prepare_reset(const struct sock *sk, struct sk_buff *skb, *traffic_key = snd_other_key(*key); rnext_key = READ_ONCE(ao_info->rnext_key); *keyid = rnext_key->rcvid; - *sne = tcp_ao_compute_sne(READ_ONCE(ao_info->snd_sne), - snd_basis, seq); + read_lock_irqsave(&ao_info->sne_lock, flags); + *sne = tcp_ao_compute_sne(ao_info->snd_sne, + READ_ONCE(*snd_basis), seq); + read_unlock_irqrestore(&ao_info->sne_lock, flags); } return 0; } @@ -795,6 +796,7 @@ int tcp_ao_transmit_skb(struct sock *sk, struct sk_buff *skb, struct tcp_sock *tp = tcp_sk(sk); struct tcp_ao_info *ao; void *tkey_buf = NULL; + unsigned long flags; u8 *traffic_key; u32 sne; @@ -816,8 +818,10 @@ int tcp_ao_transmit_skb(struct sock *sk, struct sk_buff *skb, tp->af_specific->ao_calc_key_sk(key, traffic_key, sk, ao->lisn, disn, true); } - sne = tcp_ao_compute_sne(READ_ONCE(ao->snd_sne), READ_ONCE(tp->snd_una), - ntohl(th->seq)); + read_lock_irqsave(&ao->sne_lock, flags); + sne = tcp_ao_compute_sne(ao->snd_sne, + READ_ONCE(tp->snd_una), ntohl(th->seq)); + read_unlock_irqrestore(&ao->sne_lock, flags); tp->af_specific->calc_ao_hash(hash_location, key, sk, skb, traffic_key, hash_location - (u8 *)th, sne); kfree(tkey_buf); @@ -938,8 +942,9 @@ tcp_inbound_ao_hash(struct sock *sk, const struct sk_buff *skb, /* Fast-path */ if (likely((1 << sk->sk_state) & TCP_AO_ESTABLISHED)) { - enum skb_drop_reason err; struct tcp_ao_key *current_key; + enum skb_drop_reason err; + unsigned long flags; /* Check if this socket's rnext_key matches the keyid in the * packet. If not we lookup the key based on the keyid @@ -956,8 +961,11 @@ tcp_inbound_ao_hash(struct sock *sk, const struct sk_buff *skb, if (unlikely(th->syn && !th->ack)) goto verify_hash; - sne = tcp_ao_compute_sne(info->rcv_sne, tcp_sk(sk)->rcv_nxt, + read_lock_irqsave(&info->sne_lock, flags); + sne = tcp_ao_compute_sne(info->rcv_sne, + READ_ONCE(tcp_sk(sk)->rcv_nxt), ntohl(th->seq)); + read_unlock_irqrestore(&info->sne_lock, flags); /* Established socket, traffic key are cached */ traffic_key = rcv_other_key(key); err = tcp_ao_verify_hash(sk, skb, family, info, aoh, key, diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index bcb55d98004c..fc3c27ce2b73 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -3582,8 +3582,14 @@ static void tcp_snd_sne_update(struct tcp_sock *tp, u32 ack) ao = rcu_dereference_protected(tp->ao_info, lockdep_sock_is_held((struct sock *)tp)); - if (ao && ack < tp->snd_una) + if (ao && ack < tp->snd_una) { + unsigned long flags; + + write_lock_irqsave(&ao->sne_lock, flags); ao->snd_sne++; + tp->snd_una = ack; + write_unlock_irqrestore(&ao->sne_lock, flags); + } #endif } @@ -3608,8 +3614,14 @@ static void tcp_rcv_sne_update(struct tcp_sock *tp, u32 seq) ao = rcu_dereference_protected(tp->ao_info, lockdep_sock_is_held((struct sock *)tp)); - if (ao && seq < tp->rcv_nxt) + if (ao && seq < tp->rcv_nxt) { + unsigned long flags; + + write_lock_irqsave(&ao->sne_lock, flags); ao->rcv_sne++; + WRITE_ONCE(tp->rcv_nxt, seq); + write_unlock_irqrestore(&ao->sne_lock, flags); + } #endif }