Message ID | 20230707-mptcp-unify-sockopt-issue-353-v1-5-693e15c06646@tessares.net (mailing list archive) |
---|---|
State | Changes Requested |
Delegated to: | Paolo Abeni |
Headers | show |
Series | mptcp: sockopt: uniform code to get/set values | expand |
Context | Check | Description |
---|---|---|
matttbe/checkpatch | success | total: 0 errors, 0 warnings, 0 checks, 201 lines checked |
matttbe/build | fail | Build error with: -Werror |
matttbe/KVM_Validation__normal__except_selftest_mptcp_join_ | success | Success! ✅ |
matttbe/KVM_Validation__debug__only_selftest_mptcp_join_ | success | Success! ✅ |
matttbe/KVM_Validation__debug__except_selftest_mptcp_join_ | success | Success! ✅ |
matttbe/KVM_Validation__normal__only_selftest_mptcp_join_ | success | Success! ✅ |
On Fri, 2023-07-07 at 18:00 +0200, Matthieu Baerts wrote: > Currently, to set socket option, we mostly deal with them case by case > by setting the right field in the socket structure and in fact, copying > what is done elsewhere in SOL_IP(V6). Supporting a new option is then > more complex and we can see some disparities on how things are done > between the different options. > > Instead, we can rely on ip(v6)_setsockopt() to set the different values. > By doing that, we uniform the way we deal with the different options and > we can also go through the BPF et Netlink hooks if needed. We can also > easily support new options later. > > Note that there are still two cases where we still need to deal with the > end values directly, thus with something specific case by case: > > - For SOL_TCP socket options because we cannot store them in the MPTCP > socket. Note we can also store info in the different subflows (and > create a first one if there is no subflow) and retrieve info from > there. > > - To sync the socket options when a new subflow has been created to > minimise the work. But maybe it is fine to call some getsockopt() / > setsockopt(). We could also save a list of setsockopt() that have been > done in the past and only re-do these ones. That should help with the > maintenance of these socket options and ease the support of new ones. > > Link: https://github.com/multipath-tcp/mptcp_net-next/issues/353 > Signed-off-by: Matthieu Baerts <matthieu.baerts@tessares.net> > --- > net/mptcp/sockopt.c | 148 ++++++++++++++++++---------------------------------- > 1 file changed, 50 insertions(+), 98 deletions(-) > > diff --git a/net/mptcp/sockopt.c b/net/mptcp/sockopt.c > index f0007fa7a0a8..a17739683fdb 100644 > --- a/net/mptcp/sockopt.c > +++ b/net/mptcp/sockopt.c > @@ -385,51 +385,6 @@ static int mptcp_setsockopt_sol_socket(struct mptcp_sock *msk, int optname, > return -EOPNOTSUPP; > } > > -static int mptcp_setsockopt_v6(struct mptcp_sock *msk, int optname, > - sockptr_t optval, unsigned int optlen) > -{ > - struct sock *sk = (struct sock *)msk; > - int ret = -EOPNOTSUPP; > - struct socket *ssock; > - > - switch (optname) { > - case IPV6_V6ONLY: > - case IPV6_TRANSPARENT: > - case IPV6_FREEBIND: > - lock_sock(sk); > - ssock = __mptcp_nmpc_socket(msk); > - if (IS_ERR(ssock)) { > - release_sock(sk); > - return PTR_ERR(ssock); > - } > - > - ret = tcp_setsockopt(ssock->sk, SOL_IPV6, optname, optval, optlen); > - if (ret != 0) { > - release_sock(sk); > - return ret; > - } > - > - sockopt_seq_inc(msk); > - > - switch (optname) { > - case IPV6_V6ONLY: > - sk->sk_ipv6only = ssock->sk->sk_ipv6only; > - break; > - case IPV6_TRANSPARENT: > - inet_sk(sk)->transparent = inet_sk(ssock->sk)->transparent; > - break; > - case IPV6_FREEBIND: > - inet_sk(sk)->freebind = inet_sk(ssock->sk)->freebind; > - break; > - } > - > - release_sock(sk); > - break; > - } > - > - return ret; > -} > - > static bool mptcp_supported_sockopt(int level, int optname) > { > if (level == SOL_IP) { > @@ -700,94 +655,65 @@ static int mptcp_setsockopt_sol_tcp_nodelay(struct mptcp_sock *msk, sockptr_t op > return 0; > } > > -static int mptcp_setsockopt_sol_ip_set_transparent(struct mptcp_sock *msk, int optname, > - sockptr_t optval, unsigned int optlen) > +static int mptcp_setsockopt_msk(struct mptcp_sock *msk, int level, int optname, > + sockptr_t optval, unsigned int optlen) > { > struct sock *sk = (struct sock *)msk; > - struct inet_sock *issk; > - struct socket *ssock; > - int err; > > - err = ip_setsockopt(sk, SOL_IP, optname, optval, optlen); > - if (err != 0) > - return err; > + /* We cannot use tcp_setsockopt() with the msk */ > + if (level == SOL_IP) > + return ip_setsockopt(sk, level, optname, optval, optlen); > > - lock_sock(sk); > + if (level == SOL_IPV6) > + return ipv6_setsockopt(sk, level, optname, optval, optlen); > > - ssock = __mptcp_nmpc_socket(msk); > - if (IS_ERR(ssock)) { > - release_sock(sk); > - return PTR_ERR(ssock); > - } > + if (level == SOL_TCP) > + return 0; > > - issk = inet_sk(ssock->sk); > - > - switch (optname) { > - case IP_FREEBIND: > - issk->freebind = inet_sk(sk)->freebind; > - break; > -- case IP_TRANSPARENT: > - issk->transparent = inet_sk(sk)->transparent; > - break; > - default: > - release_sock(sk); > - WARN_ON_ONCE(1); > - return -EOPNOTSUPP; > - } > - > - sockopt_seq_inc(msk); > - release_sock(sk); > - return 0; > + return -EOPNOTSUPP; > } > > -static int mptcp_setsockopt_v4_set_tos(struct mptcp_sock *msk, int optname, > - sockptr_t optval, unsigned int optlen) > +static int mptcp_setsockopt_all_sf(struct mptcp_sock *msk, int level, > + int optname, sockptr_t optval, > + unsigned int optlen) > { > struct mptcp_subflow_context *subflow; > struct sock *sk = (struct sock *)msk; > - int err, val; > + int err; > > - err = ip_setsockopt(sk, SOL_IP, optname, optval, optlen); > + err = mptcp_setsockopt_msk(msk, level, optname, optval, optlen); > > if (err != 0) > return err; > > lock_sock(sk); > sockopt_seq_inc(msk); > - val = inet_sk(sk)->tos; > mptcp_for_each_subflow(msk, subflow) { > struct sock *ssk = mptcp_subflow_tcp_sock(subflow); > > - __ip_sock_set_tos(ssk, val); > + tcp_setsockopt(ssk, level, optname, optval, optlen); The above ignores the tcp_setsockopt() return value. What if the sockopt fails on a single subflow? e.g. the 2nd one? Perhaps not possible with the currently supported sockopt, but if the goal is to extend the support in a generic way, we will likely hit that. Note that even saving and e.v. restoring the value on failure is prone to the same problem (restoring could fail, too). > } > release_sock(sk); > > return 0; > } > > -static int mptcp_setsockopt_v4(struct mptcp_sock *msk, int optname, > - sockptr_t optval, unsigned int optlen) > -{ > - switch (optname) { > - case IP_FREEBIND: > - case IP_TRANSPARENT: > - return mptcp_setsockopt_sol_ip_set_transparent(msk, optname, optval, optlen); > - case IP_TOS: > - return mptcp_setsockopt_v4_set_tos(msk, optname, optval, optlen); > - } > - > - return -EOPNOTSUPP; > -} > - > static int mptcp_setsockopt_first_sf_only(struct mptcp_sock *msk, int level, int optname, > sockptr_t optval, unsigned int optlen) > { > struct sock *sk = (struct sock *)msk; > struct socket *sock; > + struct sock *ssk; > int ret; > > - /* Limit to first subflow, before the connection establishment */ > + /* Limit to first subflow */ > lock_sock(sk); > + ssk = msk->first; > + if (ssk) { > + ret = tcp_setsockopt(ssk, level, optname, optval, optlen); > + goto unlock; > + } Why do you need the above chunk? This change the behavior of existing sockopt and possibly their return value in case of error?!? > + > sock = __mptcp_nmpc_socket(msk); > if (IS_ERR(sock)) { > ret = PTR_ERR(sock); > @@ -801,6 +727,32 @@ static int mptcp_setsockopt_first_sf_only(struct mptcp_sock *msk, int level, int > return ret; > } > > +static int mptcp_setsockopt_v4(struct mptcp_sock *msk, int optname, > + sockptr_t optval, unsigned int optlen) > +{ > + switch (optname) { > + case IP_FREEBIND: > + case IP_TRANSPARENT: > + case IP_TOS: > + return mptcp_setsockopt_all_sf(msk, SOL_IP, optname, optval, optlen); This changes the sockopt semantic a bit, applying the IP_FREEBIND/IP_TRANSPARENT to all the subflows, while before this change, only the MPC subflow was rightfull affected. Likely in practice there are no visible functional effect change, but it sounds confusing to me. Cheers, Paolo
Hi Paolo, Thank you for the reviews! On 10/07/2023 19:05, Paolo Abeni wrote: > On Fri, 2023-07-07 at 18:00 +0200, Matthieu Baerts wrote: >> Currently, to set socket option, we mostly deal with them case by case >> by setting the right field in the socket structure and in fact, copying >> what is done elsewhere in SOL_IP(V6). Supporting a new option is then >> more complex and we can see some disparities on how things are done >> between the different options. >> >> Instead, we can rely on ip(v6)_setsockopt() to set the different values. >> By doing that, we uniform the way we deal with the different options and >> we can also go through the BPF et Netlink hooks if needed. We can also >> easily support new options later. (...) >> -static int mptcp_setsockopt_v4_set_tos(struct mptcp_sock *msk, int optname, >> - sockptr_t optval, unsigned int optlen) >> +static int mptcp_setsockopt_all_sf(struct mptcp_sock *msk, int level, >> + int optname, sockptr_t optval, >> + unsigned int optlen) >> { >> struct mptcp_subflow_context *subflow; >> struct sock *sk = (struct sock *)msk; >> - int err, val; >> + int err; >> >> - err = ip_setsockopt(sk, SOL_IP, optname, optval, optlen); >> + err = mptcp_setsockopt_msk(msk, level, optname, optval, optlen); >> >> if (err != 0) >> return err; >> >> lock_sock(sk); >> sockopt_seq_inc(msk); >> - val = inet_sk(sk)->tos; >> mptcp_for_each_subflow(msk, subflow) { >> struct sock *ssk = mptcp_subflow_tcp_sock(subflow); >> >> - __ip_sock_set_tos(ssk, val); >> + tcp_setsockopt(ssk, level, optname, optval, optlen); > > The above ignores the tcp_setsockopt() return value. What if the > sockopt fails on a single subflow? e.g. the 2nd one? > > Perhaps not possible with the currently supported sockopt, but if the > goal is to extend the support in a generic way, we will likely hit > that. Note that even saving and e.v. restoring the value on failure is > prone to the same problem (restoring could fail, too). That was somehow the behaviour we had before: we use setsockopt() on the MPTCP sock and we stop if there is an error. If not, it means the parameters are fine and we can propagate the value to all subflows. Before, we were not looking for errors when setting the value on the different subflows. Now we use the "normal" flow to set this parameter on the different subflows and if the corresponding sockopt has additional checks before setting the value (or if some are added later and we missed that), that's even better because we are no longer forcing the modification of a parameter if it was not supposed to be done :) I'm not a big fan of ignoring errors but as you said, we cannot return all the different errors with the current API. Or we would need something like: > ret = setsockopt(fd, SOL_MPTCP, MPTCP_SETSOCKOPT, {level=X, name=Y, val=&Z, err=&E}, len); But that's a different topic and a new API specific to MPTCP. >> } >> release_sock(sk); >> >> return 0; >> } >> >> -static int mptcp_setsockopt_v4(struct mptcp_sock *msk, int optname, >> - sockptr_t optval, unsigned int optlen) >> -{ >> - switch (optname) { >> - case IP_FREEBIND: >> - case IP_TRANSPARENT: >> - return mptcp_setsockopt_sol_ip_set_transparent(msk, optname, optval, optlen); >> - case IP_TOS: >> - return mptcp_setsockopt_v4_set_tos(msk, optname, optval, optlen); >> - } >> - >> - return -EOPNOTSUPP; >> -} >> - >> static int mptcp_setsockopt_first_sf_only(struct mptcp_sock *msk, int level, int optname, >> sockptr_t optval, unsigned int optlen) >> { >> struct sock *sk = (struct sock *)msk; >> struct socket *sock; >> + struct sock *ssk; >> int ret; >> >> - /* Limit to first subflow, before the connection establishment */ >> + /* Limit to first subflow */ >> lock_sock(sk); >> + ssk = msk->first; >> + if (ssk) { >> + ret = tcp_setsockopt(ssk, level, optname, optval, optlen); >> + goto unlock; >> + } > > Why do you need the above chunk? This change the behavior of existing > sockopt and possibly their return value in case of error?!? I thought we could only call __mptcp_nmpc_socket() before the establishment of the connection and I wanted to be able to call setsockopt() on the first subflow even if it has been done after the establishment of the connection. I'm maybe getting confused by the modifications you did around __mptcp_nmpc_socket(). But now that I'm thinking about that, maybe it doesn't make sense what I did: if we only want to act on the first subflow, it is certainly because we want to do something before the establishment of the MPTCP connection, no? But then if the connection has been established before and if we return before doing the call to tcp_setsockopt(), we might return a different error with MPTCP than with TCP, no? >> + >> sock = __mptcp_nmpc_socket(msk); >> if (IS_ERR(sock)) { >> ret = PTR_ERR(sock); >> @@ -801,6 +727,32 @@ static int mptcp_setsockopt_first_sf_only(struct mptcp_sock *msk, int level, int >> return ret; >> } >> >> +static int mptcp_setsockopt_v4(struct mptcp_sock *msk, int optname, >> + sockptr_t optval, unsigned int optlen) >> +{ >> + switch (optname) { >> + case IP_FREEBIND: >> + case IP_TRANSPARENT: >> + case IP_TOS: >> + return mptcp_setsockopt_all_sf(msk, SOL_IP, optname, optval, optlen); > > This changes the sockopt semantic a bit, applying the > IP_FREEBIND/IP_TRANSPARENT to all the subflows, while before this > change, only the MPC subflow was rightfull affected. Likely in practice > there are no visible functional effect change, but it sounds confusing > to me. Good point. Indeed, if there is no first subflow created before the establishment of the connection, we will now only set the transparent and freebind bits on the MPTCP socket. They will be set when a new subflow has been created in the "sync()" part (which is maybe fine?) But we can call mptcp_setsockopt_first_sf_only() here if you prefer. Cheers, Matt
Hi Paolo, On 11/07/2023 11:43, Matthieu Baerts wrote: > On 10/07/2023 19:05, Paolo Abeni wrote: >> On Fri, 2023-07-07 at 18:00 +0200, Matthieu Baerts wrote: (...) >>> @@ -801,6 +727,32 @@ static int mptcp_setsockopt_first_sf_only(struct mptcp_sock *msk, int level, int >>> return ret; >>> } >>> >>> +static int mptcp_setsockopt_v4(struct mptcp_sock *msk, int optname, >>> + sockptr_t optval, unsigned int optlen) >>> +{ >>> + switch (optname) { >>> + case IP_FREEBIND: >>> + case IP_TRANSPARENT: >>> + case IP_TOS: >>> + return mptcp_setsockopt_all_sf(msk, SOL_IP, optname, optval, optlen); >> >> This changes the sockopt semantic a bit, applying the >> IP_FREEBIND/IP_TRANSPARENT to all the subflows, while before this >> change, only the MPC subflow was rightfull affected. Likely in practice >> there are no visible functional effect change, but it sounds confusing >> to me. > > Good point. Indeed, if there is no first subflow created before the > establishment of the connection, we will now only set the transparent > and freebind bits on the MPTCP socket. They will be set when a new > subflow has been created in the "sync()" part (which is maybe fine?) Thinking about that, with the same logic (the sync will be done later), we could also only call mptcp_setsockopt_msk() and avoid a lock (on a non established MPTCP connection). > But we can call mptcp_setsockopt_first_sf_only() here if you prefer. (We would also need to call mptcp_setsockopt_msk() as well) Cheers, Matt
diff --git a/net/mptcp/sockopt.c b/net/mptcp/sockopt.c index f0007fa7a0a8..a17739683fdb 100644 --- a/net/mptcp/sockopt.c +++ b/net/mptcp/sockopt.c @@ -385,51 +385,6 @@ static int mptcp_setsockopt_sol_socket(struct mptcp_sock *msk, int optname, return -EOPNOTSUPP; } -static int mptcp_setsockopt_v6(struct mptcp_sock *msk, int optname, - sockptr_t optval, unsigned int optlen) -{ - struct sock *sk = (struct sock *)msk; - int ret = -EOPNOTSUPP; - struct socket *ssock; - - switch (optname) { - case IPV6_V6ONLY: - case IPV6_TRANSPARENT: - case IPV6_FREEBIND: - lock_sock(sk); - ssock = __mptcp_nmpc_socket(msk); - if (IS_ERR(ssock)) { - release_sock(sk); - return PTR_ERR(ssock); - } - - ret = tcp_setsockopt(ssock->sk, SOL_IPV6, optname, optval, optlen); - if (ret != 0) { - release_sock(sk); - return ret; - } - - sockopt_seq_inc(msk); - - switch (optname) { - case IPV6_V6ONLY: - sk->sk_ipv6only = ssock->sk->sk_ipv6only; - break; - case IPV6_TRANSPARENT: - inet_sk(sk)->transparent = inet_sk(ssock->sk)->transparent; - break; - case IPV6_FREEBIND: - inet_sk(sk)->freebind = inet_sk(ssock->sk)->freebind; - break; - } - - release_sock(sk); - break; - } - - return ret; -} - static bool mptcp_supported_sockopt(int level, int optname) { if (level == SOL_IP) { @@ -700,94 +655,65 @@ static int mptcp_setsockopt_sol_tcp_nodelay(struct mptcp_sock *msk, sockptr_t op return 0; } -static int mptcp_setsockopt_sol_ip_set_transparent(struct mptcp_sock *msk, int optname, - sockptr_t optval, unsigned int optlen) +static int mptcp_setsockopt_msk(struct mptcp_sock *msk, int level, int optname, + sockptr_t optval, unsigned int optlen) { struct sock *sk = (struct sock *)msk; - struct inet_sock *issk; - struct socket *ssock; - int err; - err = ip_setsockopt(sk, SOL_IP, optname, optval, optlen); - if (err != 0) - return err; + /* We cannot use tcp_setsockopt() with the msk */ + if (level == SOL_IP) + return ip_setsockopt(sk, level, optname, optval, optlen); - lock_sock(sk); + if (level == SOL_IPV6) + return ipv6_setsockopt(sk, level, optname, optval, optlen); - ssock = __mptcp_nmpc_socket(msk); - if (IS_ERR(ssock)) { - release_sock(sk); - return PTR_ERR(ssock); - } + if (level == SOL_TCP) + return 0; - issk = inet_sk(ssock->sk); - - switch (optname) { - case IP_FREEBIND: - issk->freebind = inet_sk(sk)->freebind; - break; - case IP_TRANSPARENT: - issk->transparent = inet_sk(sk)->transparent; - break; - default: - release_sock(sk); - WARN_ON_ONCE(1); - return -EOPNOTSUPP; - } - - sockopt_seq_inc(msk); - release_sock(sk); - return 0; + return -EOPNOTSUPP; } -static int mptcp_setsockopt_v4_set_tos(struct mptcp_sock *msk, int optname, - sockptr_t optval, unsigned int optlen) +static int mptcp_setsockopt_all_sf(struct mptcp_sock *msk, int level, + int optname, sockptr_t optval, + unsigned int optlen) { struct mptcp_subflow_context *subflow; struct sock *sk = (struct sock *)msk; - int err, val; + int err; - err = ip_setsockopt(sk, SOL_IP, optname, optval, optlen); + err = mptcp_setsockopt_msk(msk, level, optname, optval, optlen); if (err != 0) return err; lock_sock(sk); sockopt_seq_inc(msk); - val = inet_sk(sk)->tos; mptcp_for_each_subflow(msk, subflow) { struct sock *ssk = mptcp_subflow_tcp_sock(subflow); - __ip_sock_set_tos(ssk, val); + tcp_setsockopt(ssk, level, optname, optval, optlen); } release_sock(sk); return 0; } -static int mptcp_setsockopt_v4(struct mptcp_sock *msk, int optname, - sockptr_t optval, unsigned int optlen) -{ - switch (optname) { - case IP_FREEBIND: - case IP_TRANSPARENT: - return mptcp_setsockopt_sol_ip_set_transparent(msk, optname, optval, optlen); - case IP_TOS: - return mptcp_setsockopt_v4_set_tos(msk, optname, optval, optlen); - } - - return -EOPNOTSUPP; -} - static int mptcp_setsockopt_first_sf_only(struct mptcp_sock *msk, int level, int optname, sockptr_t optval, unsigned int optlen) { struct sock *sk = (struct sock *)msk; struct socket *sock; + struct sock *ssk; int ret; - /* Limit to first subflow, before the connection establishment */ + /* Limit to first subflow */ lock_sock(sk); + ssk = msk->first; + if (ssk) { + ret = tcp_setsockopt(ssk, level, optname, optval, optlen); + goto unlock; + } + sock = __mptcp_nmpc_socket(msk); if (IS_ERR(sock)) { ret = PTR_ERR(sock); @@ -801,6 +727,32 @@ static int mptcp_setsockopt_first_sf_only(struct mptcp_sock *msk, int level, int return ret; } +static int mptcp_setsockopt_v4(struct mptcp_sock *msk, int optname, + sockptr_t optval, unsigned int optlen) +{ + switch (optname) { + case IP_FREEBIND: + case IP_TRANSPARENT: + case IP_TOS: + return mptcp_setsockopt_all_sf(msk, SOL_IP, optname, optval, optlen); + } + + return -EOPNOTSUPP; +} + +static int mptcp_setsockopt_v6(struct mptcp_sock *msk, int optname, + sockptr_t optval, unsigned int optlen) +{ + switch (optname) { + case IPV6_V6ONLY: + case IPV6_TRANSPARENT: + case IPV6_FREEBIND: + return mptcp_setsockopt_all_sf(msk, SOL_IPV6, optname, optval, optlen); + } + + return -EOPNOTSUPP; +} + static int mptcp_setsockopt_sol_tcp(struct mptcp_sock *msk, int optname, sockptr_t optval, unsigned int optlen) {
Currently, to set socket option, we mostly deal with them case by case by setting the right field in the socket structure and in fact, copying what is done elsewhere in SOL_IP(V6). Supporting a new option is then more complex and we can see some disparities on how things are done between the different options. Instead, we can rely on ip(v6)_setsockopt() to set the different values. By doing that, we uniform the way we deal with the different options and we can also go through the BPF et Netlink hooks if needed. We can also easily support new options later. Note that there are still two cases where we still need to deal with the end values directly, thus with something specific case by case: - For SOL_TCP socket options because we cannot store them in the MPTCP socket. Note we can also store info in the different subflows (and create a first one if there is no subflow) and retrieve info from there. - To sync the socket options when a new subflow has been created to minimise the work. But maybe it is fine to call some getsockopt() / setsockopt(). We could also save a list of setsockopt() that have been done in the past and only re-do these ones. That should help with the maintenance of these socket options and ease the support of new ones. Link: https://github.com/multipath-tcp/mptcp_net-next/issues/353 Signed-off-by: Matthieu Baerts <matthieu.baerts@tessares.net> --- net/mptcp/sockopt.c | 148 ++++++++++++++++++---------------------------------- 1 file changed, 50 insertions(+), 98 deletions(-)