Message ID | 20210212232214.2869897-3-eric.dumazet@gmail.com (mailing list archive) |
---|---|
State | Accepted |
Delegated to: | Netdev Maintainers |
Headers | show |
Series | tcp: mem pressure vs SO_RCVLOWAT | expand |
Context | Check | Description |
---|---|---|
netdev/cover_letter | success | Link |
netdev/fixes_present | success | Link |
netdev/patch_count | success | Link |
netdev/tree_selection | success | Clearly marked for net-next |
netdev/subject_prefix | success | Link |
netdev/cc_maintainers | warning | 2 maintainers not CCed: yoshfuji@linux-ipv6.org dsahern@kernel.org |
netdev/source_inline | success | Was 1 now: 0 |
netdev/verify_signedoff | success | Link |
netdev/module_param | success | Was 0 now: 0 |
netdev/build_32bit | success | Errors and warnings before: 1496 this patch: 1496 |
netdev/kdoc | success | Errors and warnings before: 0 this patch: 0 |
netdev/verify_fixes | success | Link |
netdev/checkpatch | success | total: 0 errors, 0 warnings, 0 checks, 66 lines checked |
netdev/build_allmodconfig_warn | success | Errors and warnings before: 1498 this patch: 1498 |
netdev/header_inline | success | Link |
netdev/stable | success | Stable not CCed |
On Fri, Feb 12, 2021 at 3:22 PM Eric Dumazet <eric.dumazet@gmail.com> wrote: > > From: Eric Dumazet <edumazet@google.com> > > Both tcp_data_ready() and tcp_stream_is_readable() share the same logic. > > Add tcp_epollin_ready() helper to avoid duplication. > > Signed-off-by: Eric Dumazet <edumazet@google.com> > Cc: Arjun Roy <arjunroy@google.com> > Cc: Wei Wang <weiwan@google.com> > --- > include/net/tcp.h | 12 ++++++++++++ > net/ipv4/tcp.c | 16 ++++------------ > net/ipv4/tcp_input.c | 11 ++--------- > 3 files changed, 18 insertions(+), 21 deletions(-) > > diff --git a/include/net/tcp.h b/include/net/tcp.h > index 244208f6f6c2ace87920b633e469421f557427a6..484eb2362645fd478f59b26b42457ecf4510eb14 100644 > --- a/include/net/tcp.h > +++ b/include/net/tcp.h > @@ -1442,6 +1442,18 @@ static inline bool tcp_rmem_pressure(const struct sock *sk) > return atomic_read(&sk->sk_rmem_alloc) > threshold; > } > > +static inline bool tcp_epollin_ready(const struct sock *sk, int target) > +{ > + const struct tcp_sock *tp = tcp_sk(sk); > + int avail = READ_ONCE(tp->rcv_nxt) - READ_ONCE(tp->copied_seq); > + > + if (avail <= 0) > + return false; > + > + return (avail >= target) || tcp_rmem_pressure(sk) || > + (tcp_receive_window(tp) <= inet_csk(sk)->icsk_ack.rcv_mss); > +} > + > extern void tcp_openreq_init_rwin(struct request_sock *req, > const struct sock *sk_listener, > const struct dst_entry *dst); > diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c > index 9896ca10bb340924b779cb6a7606d57fdd5c3357..7a6b58ae408d1fb1e5536ccfed8215be123f3b57 100644 > --- a/net/ipv4/tcp.c > +++ b/net/ipv4/tcp.c > @@ -481,19 +481,11 @@ static void tcp_tx_timestamp(struct sock *sk, u16 tsflags) > } > } > > -static inline bool tcp_stream_is_readable(const struct tcp_sock *tp, > - int target, struct sock *sk) > +static bool tcp_stream_is_readable(struct sock *sk, int target) > { > - int avail = READ_ONCE(tp->rcv_nxt) - READ_ONCE(tp->copied_seq); > + if (tcp_epollin_ready(sk, target)) > + return true; > > - if (avail > 0) { > - if (avail >= target) > - return true; > - if (tcp_rmem_pressure(sk)) > - return true; > - if (tcp_receive_window(tp) <= inet_csk(sk)->icsk_ack.rcv_mss) > - return true; > - } > if (sk->sk_prot->stream_memory_read) > return sk->sk_prot->stream_memory_read(sk); > return false; > @@ -568,7 +560,7 @@ __poll_t tcp_poll(struct file *file, struct socket *sock, poll_table *wait) > tp->urg_data) > target++; > > - if (tcp_stream_is_readable(tp, target, sk)) > + if (tcp_stream_is_readable(sk, target)) > mask |= EPOLLIN | EPOLLRDNORM; > > if (!(sk->sk_shutdown & SEND_SHUTDOWN)) { > diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c > index a8f8f98159531e5d1c80660972148986f6acd20a..e32a7056cb7640c67ef2d6a4d9484684d2602fcd 100644 > --- a/net/ipv4/tcp_input.c > +++ b/net/ipv4/tcp_input.c > @@ -4924,15 +4924,8 @@ int tcp_send_rcvq(struct sock *sk, struct msghdr *msg, size_t size) > > void tcp_data_ready(struct sock *sk) > { > - const struct tcp_sock *tp = tcp_sk(sk); > - int avail = tp->rcv_nxt - tp->copied_seq; > - > - if (avail < sk->sk_rcvlowat && !tcp_rmem_pressure(sk) && > - !sock_flag(sk, SOCK_DONE) && Seems "!sock_flag(sk, SOCK_DONE)" is not checked in tcp_epollin_read(). Does it matter? > - tcp_receive_window(tp) > inet_csk(sk)->icsk_ack.rcv_mss) > - return; > - > - sk->sk_data_ready(sk); > + if (tcp_epollin_ready(sk, sk->sk_rcvlowat)) > + sk->sk_data_ready(sk); > } > > static void tcp_data_queue(struct sock *sk, struct sk_buff *skb) > -- > 2.30.0.478.g8a0d178c01-goog >
On Sat, Feb 13, 2021 at 1:30 AM Wei Wang <weiwan@google.com> wrote: > > > void tcp_data_ready(struct sock *sk) > > { > > - const struct tcp_sock *tp = tcp_sk(sk); > > - int avail = tp->rcv_nxt - tp->copied_seq; > > - > > - if (avail < sk->sk_rcvlowat && !tcp_rmem_pressure(sk) && > > - !sock_flag(sk, SOCK_DONE) && > > Seems "!sock_flag(sk, SOCK_DONE)" is not checked in > tcp_epollin_read(). Does it matter? > Yes, probably, good catch. Not sure where tcp_poll() gets this, I have to double check. Thanks !
On Sat, Feb 13, 2021 at 8:50 AM Eric Dumazet <edumazet@google.com> wrote: > > On Sat, Feb 13, 2021 at 1:30 AM Wei Wang <weiwan@google.com> wrote: > > > > > > void tcp_data_ready(struct sock *sk) > > > { > > > - const struct tcp_sock *tp = tcp_sk(sk); > > > - int avail = tp->rcv_nxt - tp->copied_seq; > > > - > > > - if (avail < sk->sk_rcvlowat && !tcp_rmem_pressure(sk) && > > > - !sock_flag(sk, SOCK_DONE) && > > > > Seems "!sock_flag(sk, SOCK_DONE)" is not checked in > > tcp_epollin_read(). Does it matter? > > > > > Yes, probably, good catch. > > Not sure where tcp_poll() gets this, I have to double check. It gets the info from sk->sk_hutdown & RCV_SHUTDOWN tcp_find() sets both sk->sk_shutdown |= RCV_SHUTDOWN and sock_set_flag(sk, SOCK_DONE); This seems to suggest tcp_fin() could call sk->sk_data_ready() so that we do not have to test for this unlikely condition in tcp_data_ready()
On Sat, Feb 13, 2021 at 12:05 AM Eric Dumazet <edumazet@google.com> wrote: > > On Sat, Feb 13, 2021 at 8:50 AM Eric Dumazet <edumazet@google.com> wrote: > > > > On Sat, Feb 13, 2021 at 1:30 AM Wei Wang <weiwan@google.com> wrote: > > > > > > > > > void tcp_data_ready(struct sock *sk) > > > > { > > > > - const struct tcp_sock *tp = tcp_sk(sk); > > > > - int avail = tp->rcv_nxt - tp->copied_seq; > > > > - > > > > - if (avail < sk->sk_rcvlowat && !tcp_rmem_pressure(sk) && > > > > - !sock_flag(sk, SOCK_DONE) && > > > > > > Seems "!sock_flag(sk, SOCK_DONE)" is not checked in > > > tcp_epollin_read(). Does it matter? > > > > > > > > > Yes, probably, good catch. > > > > Not sure where tcp_poll() gets this, I have to double check. > > It gets the info from sk->sk_hutdown & RCV_SHUTDOWN > > tcp_find() sets both sk->sk_shutdown |= RCV_SHUTDOWN and > sock_set_flag(sk, SOCK_DONE); > > This seems to suggest tcp_fin() could call sk->sk_data_ready() so that > we do not have to test for this unlikely condition in tcp_data_ready() When a thread is subsequently then woken up due to sk_data_ready(), and it calls tcp_stream_is_readable() but we had lowat > 1 set, is there a chance of that thread then thinking that the stream is not readable, despite SOCK_DONE being set? This is assuming that the check is not added to the refactored logic. Note that on a related note if the tcp memory pressure check (for system-wide pressure) is added just to the original code in tcp_data_ready() but not added to tcp_stream_is_readable() we had this kind of issue (sk_data_ready() was called but tcp_stream_is_readable() returned false). -Arjun -Arjun
On Sat, Feb 13, 2021 at 9:10 AM Arjun Roy <arjunroy@google.com> wrote: > > On Sat, Feb 13, 2021 at 12:05 AM Eric Dumazet <edumazet@google.com> wrote: > > > > On Sat, Feb 13, 2021 at 8:50 AM Eric Dumazet <edumazet@google.com> wrote: > > > > > > On Sat, Feb 13, 2021 at 1:30 AM Wei Wang <weiwan@google.com> wrote: > > > > > > > > > > > > void tcp_data_ready(struct sock *sk) > > > > > { > > > > > - const struct tcp_sock *tp = tcp_sk(sk); > > > > > - int avail = tp->rcv_nxt - tp->copied_seq; > > > > > - > > > > > - if (avail < sk->sk_rcvlowat && !tcp_rmem_pressure(sk) && > > > > > - !sock_flag(sk, SOCK_DONE) && > > > > > > > > Seems "!sock_flag(sk, SOCK_DONE)" is not checked in > > > > tcp_epollin_read(). Does it matter? > > > > > > > > > > > > > Yes, probably, good catch. > > > > > > Not sure where tcp_poll() gets this, I have to double check. > > > > It gets the info from sk->sk_hutdown & RCV_SHUTDOWN > > > > tcp_find() sets both sk->sk_shutdown |= RCV_SHUTDOWN and > > sock_set_flag(sk, SOCK_DONE); > > > > This seems to suggest tcp_fin() could call sk->sk_data_ready() so that > > we do not have to test for this unlikely condition in tcp_data_ready() > > When a thread is subsequently then woken up due to sk_data_ready(), > and it calls tcp_stream_is_readable() but we had lowat > 1 set, is > there a chance of that thread then thinking that the stream is not > readable, despite SOCK_DONE being set? This is assuming that the check > is not added to the refactored logic. > > Note that on a related note if the tcp memory pressure check (for > system-wide pressure) is added just to the original code in > tcp_data_ready() but not added to tcp_stream_is_readable() we had this > kind of issue (sk_data_ready() was called but tcp_stream_is_readable() > returned false). > Disregard, I just saw your followup patch. So I guess it's fine. -Arjun
diff --git a/include/net/tcp.h b/include/net/tcp.h index 244208f6f6c2ace87920b633e469421f557427a6..484eb2362645fd478f59b26b42457ecf4510eb14 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1442,6 +1442,18 @@ static inline bool tcp_rmem_pressure(const struct sock *sk) return atomic_read(&sk->sk_rmem_alloc) > threshold; } +static inline bool tcp_epollin_ready(const struct sock *sk, int target) +{ + const struct tcp_sock *tp = tcp_sk(sk); + int avail = READ_ONCE(tp->rcv_nxt) - READ_ONCE(tp->copied_seq); + + if (avail <= 0) + return false; + + return (avail >= target) || tcp_rmem_pressure(sk) || + (tcp_receive_window(tp) <= inet_csk(sk)->icsk_ack.rcv_mss); +} + extern void tcp_openreq_init_rwin(struct request_sock *req, const struct sock *sk_listener, const struct dst_entry *dst); diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 9896ca10bb340924b779cb6a7606d57fdd5c3357..7a6b58ae408d1fb1e5536ccfed8215be123f3b57 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -481,19 +481,11 @@ static void tcp_tx_timestamp(struct sock *sk, u16 tsflags) } } -static inline bool tcp_stream_is_readable(const struct tcp_sock *tp, - int target, struct sock *sk) +static bool tcp_stream_is_readable(struct sock *sk, int target) { - int avail = READ_ONCE(tp->rcv_nxt) - READ_ONCE(tp->copied_seq); + if (tcp_epollin_ready(sk, target)) + return true; - if (avail > 0) { - if (avail >= target) - return true; - if (tcp_rmem_pressure(sk)) - return true; - if (tcp_receive_window(tp) <= inet_csk(sk)->icsk_ack.rcv_mss) - return true; - } if (sk->sk_prot->stream_memory_read) return sk->sk_prot->stream_memory_read(sk); return false; @@ -568,7 +560,7 @@ __poll_t tcp_poll(struct file *file, struct socket *sock, poll_table *wait) tp->urg_data) target++; - if (tcp_stream_is_readable(tp, target, sk)) + if (tcp_stream_is_readable(sk, target)) mask |= EPOLLIN | EPOLLRDNORM; if (!(sk->sk_shutdown & SEND_SHUTDOWN)) { diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index a8f8f98159531e5d1c80660972148986f6acd20a..e32a7056cb7640c67ef2d6a4d9484684d2602fcd 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -4924,15 +4924,8 @@ int tcp_send_rcvq(struct sock *sk, struct msghdr *msg, size_t size) void tcp_data_ready(struct sock *sk) { - const struct tcp_sock *tp = tcp_sk(sk); - int avail = tp->rcv_nxt - tp->copied_seq; - - if (avail < sk->sk_rcvlowat && !tcp_rmem_pressure(sk) && - !sock_flag(sk, SOCK_DONE) && - tcp_receive_window(tp) > inet_csk(sk)->icsk_ack.rcv_mss) - return; - - sk->sk_data_ready(sk); + if (tcp_epollin_ready(sk, sk->sk_rcvlowat)) + sk->sk_data_ready(sk); } static void tcp_data_queue(struct sock *sk, struct sk_buff *skb)