@@ -114,6 +114,7 @@ int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp)
const struct inet_timewait_sock *tw = inet_twsk(sktw);
const struct tcp_timewait_sock *tcptw = tcp_twsk(sktw);
struct tcp_sock *tp = tcp_sk(sk);
+ int ts_recent_stamp;
if (reuse == 2) {
/* Still does not detect *everything* that goes through
@@ -152,9 +153,10 @@ int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp)
If TW bucket has been already destroyed we fall back to VJ's scheme
and use initial timestamp retrieved from peer table.
*/
- if (tcptw->tw_ts_recent_stamp &&
+ ts_recent_stamp = READ_ONCE(tcptw->tw_ts_recent_stamp);
+ if (ts_recent_stamp &&
(!twp || (reuse && time_after32(ktime_get_seconds(),
- tcptw->tw_ts_recent_stamp)))) {
+ ts_recent_stamp)))) {
/* inet_twsk_hashdance() sets sk_refcnt after putting twsk
* and releasing the bucket lock.
*/
@@ -178,8 +180,8 @@ int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp)
if (!seq)
seq = 1;
WRITE_ONCE(tp->write_seq, seq);
- tp->rx_opt.ts_recent = tcptw->tw_ts_recent;
- tp->rx_opt.ts_recent_stamp = tcptw->tw_ts_recent_stamp;
+ tp->rx_opt.ts_recent = READ_ONCE(tcptw->tw_ts_recent);
+ tp->rx_opt.ts_recent_stamp = ts_recent_stamp;
}
return 1;
@@ -1064,7 +1066,7 @@ static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb)
tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
tcp_tw_tsval(tcptw),
- tcptw->tw_ts_recent,
+ READ_ONCE(tcptw->tw_ts_recent),
tw->tw_bound_dev_if, &key,
tw->tw_transparent ? IP_REPLY_ARG_NOSRCCHECK : 0,
tw->tw_tos,
@@ -101,16 +101,18 @@ tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb,
struct tcp_options_received tmp_opt;
struct tcp_timewait_sock *tcptw = tcp_twsk((struct sock *)tw);
bool paws_reject = false;
+ int ts_recent_stamp;
tmp_opt.saw_tstamp = 0;
- if (th->doff > (sizeof(*th) >> 2) && tcptw->tw_ts_recent_stamp) {
+ ts_recent_stamp = READ_ONCE(tcptw->tw_ts_recent_stamp);
+ if (th->doff > (sizeof(*th) >> 2) && ts_recent_stamp) {
tcp_parse_options(twsk_net(tw), skb, &tmp_opt, 0, NULL);
if (tmp_opt.saw_tstamp) {
if (tmp_opt.rcv_tsecr)
tmp_opt.rcv_tsecr -= tcptw->tw_ts_offset;
- tmp_opt.ts_recent = tcptw->tw_ts_recent;
- tmp_opt.ts_recent_stamp = tcptw->tw_ts_recent_stamp;
+ tmp_opt.ts_recent = READ_ONCE(tcptw->tw_ts_recent);
+ tmp_opt.ts_recent_stamp = ts_recent_stamp;
paws_reject = tcp_paws_reject(&tmp_opt, th->rst);
}
}
@@ -152,8 +154,10 @@ tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb,
twsk_rcv_nxt_update(tcptw, TCP_SKB_CB(skb)->end_seq);
if (tmp_opt.saw_tstamp) {
- tcptw->tw_ts_recent_stamp = ktime_get_seconds();
- tcptw->tw_ts_recent = tmp_opt.rcv_tsval;
+ WRITE_ONCE(tcptw->tw_ts_recent_stamp,
+ ktime_get_seconds());
+ WRITE_ONCE(tcptw->tw_ts_recent,
+ tmp_opt.rcv_tsval);
}
inet_twsk_reschedule(tw, TCP_TIMEWAIT_LEN);
@@ -197,8 +201,10 @@ tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb,
}
if (tmp_opt.saw_tstamp) {
- tcptw->tw_ts_recent = tmp_opt.rcv_tsval;
- tcptw->tw_ts_recent_stamp = ktime_get_seconds();
+ WRITE_ONCE(tcptw->tw_ts_recent,
+ tmp_opt.rcv_tsval);
+ WRITE_ONCE(tcptw->tw_ts_recent_stamp,
+ ktime_get_seconds());
}
inet_twsk_put(tw);
@@ -225,7 +231,7 @@ tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb,
if (th->syn && !th->rst && !th->ack && !paws_reject &&
(after(TCP_SKB_CB(skb)->seq, tcptw->tw_rcv_nxt) ||
(tmp_opt.saw_tstamp &&
- (s32)(tcptw->tw_ts_recent - tmp_opt.rcv_tsval) < 0))) {
+ (s32)(READ_ONCE(tcptw->tw_ts_recent) - tmp_opt.rcv_tsval) < 0))) {
u32 isn = tcptw->tw_snd_nxt + 65535 + 2;
if (isn == 0)
isn++;
@@ -1196,9 +1196,9 @@ static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb)
tcp_v6_send_ack(sk, skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
tcp_tw_tsval(tcptw),
- tcptw->tw_ts_recent, tw->tw_bound_dev_if, &key,
- tw->tw_tclass, cpu_to_be32(tw->tw_flowlabel), tw->tw_priority,
- tw->tw_txhash);
+ READ_ONCE(tcptw->tw_ts_recent), tw->tw_bound_dev_if,
+ &key, tw->tw_tclass, cpu_to_be32(tw->tw_flowlabel),
+ tw->tw_priority, tw->tw_txhash);
#ifdef CONFIG_TCP_AO
out:
These fields can be read and written locklessly, add annotations around these minor races. Signed-off-by: Eric Dumazet <edumazet@google.com> --- net/ipv4/tcp_ipv4.c | 12 +++++++----- net/ipv4/tcp_minisocks.c | 22 ++++++++++++++-------- net/ipv6/tcp_ipv6.c | 6 +++--- 3 files changed, 24 insertions(+), 16 deletions(-)