Message ID | 20201201144418.35045-5-kuniyu@amazon.co.jp (mailing list archive) |
---|---|
State | Changes Requested |
Delegated to: | BPF |
Headers | show |
Series | Socket migration for SO_REUSEPORT. | expand |
Context | Check | Description |
---|---|---|
netdev/cover_letter | success | Link |
netdev/fixes_present | success | Link |
netdev/patch_count | success | Link |
netdev/tree_selection | success | Clearly marked for bpf-next |
netdev/subject_prefix | success | Link |
netdev/source_inline | success | Was 0 now: 0 |
netdev/verify_signedoff | success | Link |
netdev/module_param | success | Was 0 now: 0 |
netdev/build_32bit | success | Errors and warnings before: 4 this patch: 4 |
netdev/kdoc | success | Errors and warnings before: 0 this patch: 0 |
netdev/verify_fixes | success | Link |
netdev/checkpatch | warning | WARNING: line length of 82 exceeds 80 columns WARNING: line length of 84 exceeds 80 columns WARNING: line length of 91 exceeds 80 columns WARNING: line length of 93 exceeds 80 columns |
netdev/build_allmodconfig_warn | success | Errors and warnings before: 4 this patch: 4 |
netdev/header_inline | success | Link |
netdev/stable | success | Stable not CCed |
On 12/1/20 3:44 PM, Kuniyuki Iwashima wrote: > A TFO request socket is only freed after BOTH 3WHS has completed (or > aborted) and the child socket has been accepted (or its listener has been > closed). Hence, depending on the order, there can be two kinds of request > sockets in the accept queue. > > 3WHS -> accept : TCP_ESTABLISHED > accept -> 3WHS : TCP_SYN_RECV > > Unlike TCP_ESTABLISHED socket, accept() does not free the request socket > for TCP_SYN_RECV socket. It is freed later at reqsk_fastopen_remove(). > Also, it accesses request_sock.rsk_listener. So, in order to complete TFO > socket migration, we have to set the current listener to it at accept() > before reqsk_fastopen_remove(). > > Moreover, if TFO request caused RST before 3WHS has completed, it is held > in the listener's TFO queue to prevent DDoS attack. Thus, we also have to > migrate the requests in TFO queue. > > Reviewed-by: Benjamin Herrenschmidt <benh@amazon.com> > Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.co.jp> > --- > net/ipv4/inet_connection_sock.c | 35 ++++++++++++++++++++++++++++++++- > 1 file changed, 34 insertions(+), 1 deletion(-) > > diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c > index b27241ea96bd..361efe55b1ad 100644 > --- a/net/ipv4/inet_connection_sock.c > +++ b/net/ipv4/inet_connection_sock.c > @@ -500,6 +500,16 @@ struct sock *inet_csk_accept(struct sock *sk, int flags, int *err, bool kern) > tcp_rsk(req)->tfo_listener) { > spin_lock_bh(&queue->fastopenq.lock); > if (tcp_rsk(req)->tfo_listener) { > + if (req->rsk_listener != sk) { > + /* TFO request was migrated to another listener so > + * the new listener must be used in reqsk_fastopen_remove() > + * to hold requests which cause RST. > + */ > + sock_put(req->rsk_listener); > + sock_hold(sk); > + req->rsk_listener = sk; > + } > + > /* We are still waiting for the final ACK from 3WHS > * so can't free req now. Instead, we set req->sk to > * NULL to signify that the child socket is taken > @@ -954,7 +964,6 @@ static void inet_child_forget(struct sock *sk, struct request_sock *req, > > if (sk->sk_protocol == IPPROTO_TCP && tcp_rsk(req)->tfo_listener) { > BUG_ON(rcu_access_pointer(tcp_sk(child)->fastopen_rsk) != req); > - BUG_ON(sk != req->rsk_listener); > > /* Paranoid, to prevent race condition if > * an inbound pkt destined for child is > @@ -995,6 +1004,7 @@ EXPORT_SYMBOL(inet_csk_reqsk_queue_add); > void inet_csk_reqsk_queue_migrate(struct sock *sk, struct sock *nsk) > { > struct request_sock_queue *old_accept_queue, *new_accept_queue; > + struct fastopen_queue *old_fastopenq, *new_fastopenq; > > old_accept_queue = &inet_csk(sk)->icsk_accept_queue; > new_accept_queue = &inet_csk(nsk)->icsk_accept_queue; > @@ -1019,6 +1029,29 @@ void inet_csk_reqsk_queue_migrate(struct sock *sk, struct sock *nsk) > > spin_unlock(&new_accept_queue->rskq_lock); > spin_unlock(&old_accept_queue->rskq_lock); > + > + old_fastopenq = &old_accept_queue->fastopenq; > + new_fastopenq = &new_accept_queue->fastopenq; > + > + spin_lock_bh(&old_fastopenq->lock); > + spin_lock_bh(&new_fastopenq->lock); Same remark about lockdep being not happy with this (I guess) > + > + new_fastopenq->qlen += old_fastopenq->qlen; > + old_fastopenq->qlen = 0; > + > + if (old_fastopenq->rskq_rst_head) { > + if (new_fastopenq->rskq_rst_head) > + old_fastopenq->rskq_rst_tail->dl_next = new_fastopenq->rskq_rst_head; > + else > + old_fastopenq->rskq_rst_tail = new_fastopenq->rskq_rst_tail; > + > + new_fastopenq->rskq_rst_head = old_fastopenq->rskq_rst_head; > + old_fastopenq->rskq_rst_head = NULL; > + old_fastopenq->rskq_rst_tail = NULL; > + } > + > + spin_unlock_bh(&new_fastopenq->lock); > + spin_unlock_bh(&old_fastopenq->lock); > } > EXPORT_SYMBOL(inet_csk_reqsk_queue_migrate); > >
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index b27241ea96bd..361efe55b1ad 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -500,6 +500,16 @@ struct sock *inet_csk_accept(struct sock *sk, int flags, int *err, bool kern) tcp_rsk(req)->tfo_listener) { spin_lock_bh(&queue->fastopenq.lock); if (tcp_rsk(req)->tfo_listener) { + if (req->rsk_listener != sk) { + /* TFO request was migrated to another listener so + * the new listener must be used in reqsk_fastopen_remove() + * to hold requests which cause RST. + */ + sock_put(req->rsk_listener); + sock_hold(sk); + req->rsk_listener = sk; + } + /* We are still waiting for the final ACK from 3WHS * so can't free req now. Instead, we set req->sk to * NULL to signify that the child socket is taken @@ -954,7 +964,6 @@ static void inet_child_forget(struct sock *sk, struct request_sock *req, if (sk->sk_protocol == IPPROTO_TCP && tcp_rsk(req)->tfo_listener) { BUG_ON(rcu_access_pointer(tcp_sk(child)->fastopen_rsk) != req); - BUG_ON(sk != req->rsk_listener); /* Paranoid, to prevent race condition if * an inbound pkt destined for child is @@ -995,6 +1004,7 @@ EXPORT_SYMBOL(inet_csk_reqsk_queue_add); void inet_csk_reqsk_queue_migrate(struct sock *sk, struct sock *nsk) { struct request_sock_queue *old_accept_queue, *new_accept_queue; + struct fastopen_queue *old_fastopenq, *new_fastopenq; old_accept_queue = &inet_csk(sk)->icsk_accept_queue; new_accept_queue = &inet_csk(nsk)->icsk_accept_queue; @@ -1019,6 +1029,29 @@ void inet_csk_reqsk_queue_migrate(struct sock *sk, struct sock *nsk) spin_unlock(&new_accept_queue->rskq_lock); spin_unlock(&old_accept_queue->rskq_lock); + + old_fastopenq = &old_accept_queue->fastopenq; + new_fastopenq = &new_accept_queue->fastopenq; + + spin_lock_bh(&old_fastopenq->lock); + spin_lock_bh(&new_fastopenq->lock); + + new_fastopenq->qlen += old_fastopenq->qlen; + old_fastopenq->qlen = 0; + + if (old_fastopenq->rskq_rst_head) { + if (new_fastopenq->rskq_rst_head) + old_fastopenq->rskq_rst_tail->dl_next = new_fastopenq->rskq_rst_head; + else + old_fastopenq->rskq_rst_tail = new_fastopenq->rskq_rst_tail; + + new_fastopenq->rskq_rst_head = old_fastopenq->rskq_rst_head; + old_fastopenq->rskq_rst_head = NULL; + old_fastopenq->rskq_rst_tail = NULL; + } + + spin_unlock_bh(&new_fastopenq->lock); + spin_unlock_bh(&old_fastopenq->lock); } EXPORT_SYMBOL(inet_csk_reqsk_queue_migrate);