Message ID | 67c1471683200188b96a3f712dd2e8def7978462.1628544649.git.cdleonard@gmail.com (mailing list archive) |
---|---|
State | RFC |
Delegated to: | Netdev Maintainers |
Headers | show |
Series | tcp: Initial support for RFC5925 auth option | expand |
Hi Leonard, On Tue, 10 Aug 2021 at 02:50, Leonard Crestez <cdleonard@gmail.com> wrote: [..] > +/* Representation of a Master Key Tuple as per RFC5925 */ > +struct tcp_authopt_key_info { > + struct hlist_node node; > + /* Local identifier */ > + u32 local_id; There is no local_id in RFC5925, what's that? An MKT is identified by (send_id, recv_id), together with (src_addr/src_port, dst_addr/dst_port). Why introducing something new to already complicated RFC? > + u32 flags; > + /* Wire identifiers */ > + u8 send_id, recv_id; > + u8 alg_id; > + u8 keylen; > + u8 key[TCP_AUTHOPT_MAXKEYLEN]; > + struct rcu_head rcu; This is unaligned and will add padding. I wonder if it's also worth saving some bytes by something like struct tcp_ao_key *key; With struct tcp_ao_key { u8 keylen; u8 key[0]; }; Hmm? > + struct sockaddr_storage addr; > +}; > + > +/* Per-socket information regarding tcp_authopt */ > +struct tcp_authopt_info { > + /* List of tcp_authopt_key_info */ > + struct hlist_head head; > + u32 flags; > + u32 src_isn; > + u32 dst_isn; > + struct rcu_head rcu; Ditto, adds padding on 64-bit. [..] > +++ b/include/uapi/linux/tcp.h > @@ -126,10 +126,12 @@ enum { > #define TCP_INQ 36 /* Notify bytes available to read as a cmsg on read */ > > #define TCP_CM_INQ TCP_INQ > > #define TCP_TX_DELAY 37 /* delay outgoing packets by XX usec */ > +#define TCP_AUTHOPT 38 /* TCP Authentication Option (RFC2385) */ > +#define TCP_AUTHOPT_KEY 39 /* TCP Authentication Option update key (RFC2385) */ RFC2385 is md5 one. Also, should there be TCP_AUTHOPT_ADD_KEY, TCP_AUTHOPT_DELTE_KEY instead of using flags inside setsocketopt()? (no hard feelings) [..] > +/** > + * enum tcp_authopt_flag - flags for `tcp_authopt.flags` > + */ > +enum tcp_authopt_flag { > + /** > + * @TCP_AUTHOPT_FLAG_REJECT_UNEXPECTED: > + * Configure behavior of segments with TCP-AO coming from hosts for which no > + * key is configured. The default recommended by RFC is to silently accept > + * such connections. > + */ > + TCP_AUTHOPT_FLAG_REJECT_UNEXPECTED = (1 << 2), > +}; > + > +/** > + * struct tcp_authopt - Per-socket options related to TCP Authentication Option > + */ > +struct tcp_authopt { > + /** @flags: Combination of &enum tcp_authopt_flag */ > + __u32 flags; > +}; I'm not sure what's the use of enum here, probably: #define TCP_AUTHOPT_FLAG_REJECT_UNEXPECTED BIT(2) [..] > +struct tcp_authopt_key { > + /** @flags: Combination of &enum tcp_authopt_key_flag */ > + __u32 flags; > + /** @local_id: Local identifier */ > + __u32 local_id; > + /** @send_id: keyid value for send */ > + __u8 send_id; > + /** @recv_id: keyid value for receive */ > + __u8 recv_id; > + /** @alg: One of &enum tcp_authopt_alg */ > + __u8 alg; > + /** @keylen: Length of the key buffer */ > + __u8 keylen; > + /** @key: Secret key */ > + __u8 key[TCP_AUTHOPT_MAXKEYLEN]; > + /** > + * @addr: Key is only valid for this address > + * > + * Ignored unless TCP_AUTHOPT_KEY_ADDR_BIND flag is set > + */ > + struct __kernel_sockaddr_storage addr; > +}; It'll be an ABI if this is accepted. As it is - it can't support RFC5925 fully. Extending syscall ABI is painful. I think, even the initial ABI *must* support all possible features of the RFC. In other words, there must be src_addr, src_port, dst_addr and dst_port. I can see from docs you've written you don't want to support a mix of different addr/port MKTs, so you can return -EINVAL or -ENOTSUPP for any value but zero. This will create an ABI that can be later extended to fully support RFC. The same is about key: I don't think you need to define/use tcp_authopt_alg. Just use algo name - that way TCP-AO will automatically be able to use any algo supported by crypto engine. See how xfrm does it, e.g.: struct xfrm_algo_auth { char alg_name[64]; unsigned int alg_key_len; /* in bits */ unsigned int alg_trunc_len; /* in bits */ char alg_key[0]; }; So you can let a user chose maclen instead of hard-coding it. Much more extendable than what you propose. [..] > +#ifdef CONFIG_TCP_AUTHOPT > + case TCP_AUTHOPT: { > + struct tcp_authopt info; > + > + if (get_user(len, optlen)) > + return -EFAULT; > + > + lock_sock(sk); > + tcp_get_authopt_val(sk, &info); > + release_sock(sk); > + > + len = min_t(unsigned int, len, sizeof(info)); > + if (put_user(len, optlen)) > + return -EFAULT; > + if (copy_to_user(optval, &info, len)) > + return -EFAULT; > + return 0; > + } I'm pretty sure it's not a good choice to write partly tcp_authopt. And a user has no way to check what's the correct len on this kernel. Instead of len = min_t(unsigned int, len, sizeof(info)), it should be if (len != sizeof(info)) return -EINVAL; [..] > +int tcp_set_authopt(struct sock *sk, sockptr_t optval, unsigned int optlen) > +{ > + struct tcp_authopt opt; > + struct tcp_authopt_info *info; > + > + WARN_ON(!lockdep_sock_is_held(sk)); sock_owned_by_me(sk) > + > + /* If userspace optlen is too short fill the rest with zeros */ > + if (optlen > sizeof(opt)) > + return -EINVAL; > + memset(&opt, 0, sizeof(opt)); it's 4 bytes, why not just (optlen != sizeof(opt))? [..] > +int tcp_get_authopt_val(struct sock *sk, struct tcp_authopt *opt) > +{ > + struct tcp_sock *tp = tcp_sk(sk); > + struct tcp_authopt_info *info; > + > + WARN_ON(!lockdep_sock_is_held(sk)); sock_owned_by_me(sk) [..] > +int tcp_set_authopt_key(struct sock *sk, sockptr_t optval, unsigned int optlen) > +{ > + struct tcp_authopt_key opt; > + struct tcp_authopt_info *info; > + struct tcp_authopt_key_info *key_info; > + > + /* If userspace optlen is too short fill the rest with zeros */ > + if (optlen > sizeof(opt)) > + return -EINVAL; > + memset(&opt, 0, sizeof(opt)); > + if (copy_from_sockptr(&opt, optval, optlen)) > + return -EFAULT; Again, not a good practice to zero-extend struct. Enforce proper size with -EINVAL. [..] > + /* Initialize tcp_authopt_info if not already set */ > + info = __tcp_authopt_info_get_or_create(sk); > + if (IS_ERR(info)) > + return PTR_ERR(info); > + > + /* check key family */ > + if (opt.flags & TCP_AUTHOPT_KEY_ADDR_BIND) { > + if (sk->sk_family != opt.addr.ss_family) > + return -EINVAL; > + } Probably, can be in the reverse order, so that __tcp_authopt_info_get_or_create() won't allocate memory. > + /* If an old value exists for same local_id it is deleted */ > + key_info = __tcp_authopt_key_info_lookup(sk, info, opt.local_id); > + if (key_info) > + tcp_authopt_key_del(sk, info, key_info); > + key_info = sock_kmalloc(sk, sizeof(*key_info), GFP_KERNEL | __GFP_ZERO); > + if (!key_info) > + return -ENOMEM; 1. You don't need sock_kmalloc() together with tcp_authopt_key_del(). It just frees the memory and allocates it back straight away - no sense in doing that. 2. I think RFC says you must not allow a user to change an existing key: > MKT parameters are not changed. Instead, new MKTs can be installed, and a connection > can change which MKT it uses. IIUC, it means that one can't just change an existing MKT, but one can remove and later add MKT with the same (send_id, recv_id, src_addr/port, dst_addr/port). So, a reasonable thing to do: if (key_info) return -EEXIST. Thanks, Dmitry
On 8/10/21 11:41 PM, Dmitry Safonov wrote: > Hi Leonard, > > On Tue, 10 Aug 2021 at 02:50, Leonard Crestez <cdleonard@gmail.com> wrote: > [..] >> +/* Representation of a Master Key Tuple as per RFC5925 */ >> +struct tcp_authopt_key_info { >> + struct hlist_node node; >> + /* Local identifier */ >> + u32 local_id; > > There is no local_id in RFC5925, what's that? > An MKT is identified by (send_id, recv_id), together with > (src_addr/src_port, dst_addr/dst_port). > Why introducing something new to already complicated RFC? It was there to simplify user interface and initial implementation. But it seems that BGP listeners already needs to support multiple keychains for different peers so identifying the key by (send_id, recv_id, binding) is easier for userspace to work with. Otherwise they need to create their own local_id mapping internally. >> + u32 flags; >> + /* Wire identifiers */ >> + u8 send_id, recv_id; >> + u8 alg_id; >> + u8 keylen; >> + u8 key[TCP_AUTHOPT_MAXKEYLEN]; >> + struct rcu_head rcu; > > This is unaligned and will add padding. Not clear padding matters. Moving rcu_head higher would avoid it, is that what you're suggesting. > I wonder if it's also worth saving some bytes by something like > struct tcp_ao_key *key; > > With > struct tcp_ao_key { > u8 keylen; > u8 key[0]; > }; > > Hmm? This increases memory management complexity for very minor gains. Very few tcp_authopt_key will ever be created. >> + struct sockaddr_storage addr; >> +}; >> + >> +/* Per-socket information regarding tcp_authopt */ >> +struct tcp_authopt_info { >> + /* List of tcp_authopt_key_info */ >> + struct hlist_head head; >> + u32 flags; >> + u32 src_isn; >> + u32 dst_isn; >> + struct rcu_head rcu; > > Ditto, adds padding on 64-bit. > > [..] >> +++ b/include/uapi/linux/tcp.h >> @@ -126,10 +126,12 @@ enum { >> #define TCP_INQ 36 /* Notify bytes available to read as a cmsg on read */ >> >> #define TCP_CM_INQ TCP_INQ >> >> #define TCP_TX_DELAY 37 /* delay outgoing packets by XX usec */ >> +#define TCP_AUTHOPT 38 /* TCP Authentication Option (RFC2385) */ >> +#define TCP_AUTHOPT_KEY 39 /* TCP Authentication Option update key (RFC2385) */ > > RFC2385 is md5 one. > Also, should there be TCP_AUTHOPT_ADD_KEY, TCP_AUTHOPT_DELTE_KEY > instead of using flags inside setsocketopt()? (no hard feelings) Fixed RFC reference. TCP_AUTHOPT_DELETE_KEY could be clearer, I just wanted to avoid bloating the sockopt space. But there's not any scarcity. For reference tcp_md5 handles key deletion based on keylen == 0. This seems wrong to me, an empty key is in fact valid though not realistic. If local_id is removed in favor of "full match on id and binding" then the delete sockopt would still need most of a full struct tcp_authopt_key anyway. > [..] >> +/** >> + * enum tcp_authopt_flag - flags for `tcp_authopt.flags` >> + */ >> +enum tcp_authopt_flag { >> + /** >> + * @TCP_AUTHOPT_FLAG_REJECT_UNEXPECTED: >> + * Configure behavior of segments with TCP-AO coming from hosts for which no >> + * key is configured. The default recommended by RFC is to silently accept >> + * such connections. >> + */ >> + TCP_AUTHOPT_FLAG_REJECT_UNEXPECTED = (1 << 2), >> +}; >> + >> +/** >> + * struct tcp_authopt - Per-socket options related to TCP Authentication Option >> + */ >> +struct tcp_authopt { >> + /** @flags: Combination of &enum tcp_authopt_flag */ >> + __u32 flags; >> +}; > > I'm not sure what's the use of enum here, probably: > #define TCP_AUTHOPT_FLAG_REJECT_UNEXPECTED BIT(2) This is an enum because it looks nice in kernel-doc. I couldn't find a way to attach docs to a macro and include it somewhere else. BTW, the enum gains more members later. As for BIT() it doesn't see to be allowed in uapi and there were recent changes removing such usage. > [..] >> +struct tcp_authopt_key { >> + /** @flags: Combination of &enum tcp_authopt_key_flag */ >> + __u32 flags; >> + /** @local_id: Local identifier */ >> + __u32 local_id; >> + /** @send_id: keyid value for send */ >> + __u8 send_id; >> + /** @recv_id: keyid value for receive */ >> + __u8 recv_id; >> + /** @alg: One of &enum tcp_authopt_alg */ >> + __u8 alg; >> + /** @keylen: Length of the key buffer */ >> + __u8 keylen; >> + /** @key: Secret key */ >> + __u8 key[TCP_AUTHOPT_MAXKEYLEN]; >> + /** >> + * @addr: Key is only valid for this address >> + * >> + * Ignored unless TCP_AUTHOPT_KEY_ADDR_BIND flag is set >> + */ >> + struct __kernel_sockaddr_storage addr; >> +}; > > It'll be an ABI if this is accepted. As it is - it can't support RFC5925 fully. > Extending syscall ABI is painful. I think, even the initial ABI *must* support > all possible features of the RFC. > In other words, there must be src_addr, src_port, dst_addr and dst_port. > I can see from docs you've written you don't want to support a mix of different > addr/port MKTs, so you can return -EINVAL or -ENOTSUPP for any value > but zero. > This will create an ABI that can be later extended to fully support RFC. RFC states that MKT connection identifiers can be specified using ranges and wildcards and the details are up to the implementation. Keys are *NOT* just bound to a classical TCP 4-tuple. * src_addr and src_port is implicit in socket binding. Maybe in theory src_addr they could apply for a server socket bound to 0.0.0.0:PORT but userspace can just open more sockets. * dst_port is not supported by MD5 and I can't think of any useful usecase. This is either well known (179 for BGP) or auto-allocated. * tcp_md5 was recently enhanced allow a "prefixlen" for addr and "l3mdev" ifindex binding. This last point shows that the binding features people require can't be easily predicted in advance so it's better to allow the rules to be extended. > The same is about key: I don't think you need to define/use tcp_authopt_alg. > Just use algo name - that way TCP-AO will automatically be able to use > any algo supported by crypto engine. > See how xfrm does it, e.g.: > struct xfrm_algo_auth { > char alg_name[64]; > unsigned int alg_key_len; /* in bits */ > unsigned int alg_trunc_len; /* in bits */ > char alg_key[0]; > }; > > So you can let a user chose maclen instead of hard-coding it. > Much more extendable than what you propose. This complicates ABI and implementation with features that are not needed. I'd much rather only expose an enum of real-world tcp-ao algorithms. > [..] >> +#ifdef CONFIG_TCP_AUTHOPT >> + case TCP_AUTHOPT: { >> + struct tcp_authopt info; >> + >> + if (get_user(len, optlen)) >> + return -EFAULT; >> + >> + lock_sock(sk); >> + tcp_get_authopt_val(sk, &info); >> + release_sock(sk); >> + >> + len = min_t(unsigned int, len, sizeof(info)); >> + if (put_user(len, optlen)) >> + return -EFAULT; >> + if (copy_to_user(optval, &info, len)) >> + return -EFAULT; >> + return 0; >> + } > > I'm pretty sure it's not a good choice to write partly tcp_authopt. > And a user has no way to check what's the correct len on this kernel. > Instead of len = min_t(unsigned int, len, sizeof(info)), it should be > if (len != sizeof(info)) > return -EINVAL; Purpose is to allow sockopts to grow as md5 has grown. > [..] >> +int tcp_set_authopt(struct sock *sk, sockptr_t optval, unsigned int optlen) >> +{ >> + struct tcp_authopt opt; >> + struct tcp_authopt_info *info; >> + >> + WARN_ON(!lockdep_sock_is_held(sk)); > > sock_owned_by_me(sk) Yes, this seems better. I think there are several socket locking mistakes in later commits. The patch adding support for detailed keyid control probably needs bh_lock_sock to avoid races between key selection and caching on send and key manipulation by userspace. >> + /* If userspace optlen is too short fill the rest with zeros */ >> + if (optlen > sizeof(opt)) >> + return -EINVAL; >> + memset(&opt, 0, sizeof(opt)); > > it's 4 bytes, why not just (optlen != sizeof(opt))? It's extended in a later commit to add detailed keyid control. > [..] >> +int tcp_get_authopt_val(struct sock *sk, struct tcp_authopt *opt) >> +{ >> + struct tcp_sock *tp = tcp_sk(sk); >> + struct tcp_authopt_info *info; >> + >> + WARN_ON(!lockdep_sock_is_held(sk)); > > sock_owned_by_me(sk) > > [..] >> +int tcp_set_authopt_key(struct sock *sk, sockptr_t optval, unsigned int optlen) >> +{ >> + struct tcp_authopt_key opt; >> + struct tcp_authopt_info *info; >> + struct tcp_authopt_key_info *key_info; >> + >> + /* If userspace optlen is too short fill the rest with zeros */ >> + if (optlen > sizeof(opt)) >> + return -EINVAL; >> + memset(&opt, 0, sizeof(opt)); >> + if (copy_from_sockptr(&opt, optval, optlen)) >> + return -EFAULT; > > Again, not a good practice to zero-extend struct. Enforce proper size > with -EINVAL. > > [..] >> + /* Initialize tcp_authopt_info if not already set */ >> + info = __tcp_authopt_info_get_or_create(sk); >> + if (IS_ERR(info)) >> + return PTR_ERR(info); >> + >> + /* check key family */ >> + if (opt.flags & TCP_AUTHOPT_KEY_ADDR_BIND) { >> + if (sk->sk_family != opt.addr.ss_family) >> + return -EINVAL; >> + } > > Probably, can be in the reverse order, so that > __tcp_authopt_info_get_or_create() > won't allocate memory. OK. It only saves a tiny bit of memory on API misuse. >> + /* If an old value exists for same local_id it is deleted */ >> + key_info = __tcp_authopt_key_info_lookup(sk, info, opt.local_id); >> + if (key_info) >> + tcp_authopt_key_del(sk, info, key_info); >> + key_info = sock_kmalloc(sk, sizeof(*key_info), GFP_KERNEL | __GFP_ZERO); >> + if (!key_info) >> + return -ENOMEM; > > 1. You don't need sock_kmalloc() together with tcp_authopt_key_del(). > It just frees the memory and allocates it back straight away - no > sense in doing that. The list is scanned in multiple places in later commits using nothing but an rcu_read_lock, this means that keys can't be updated in-place. > 2. I think RFC says you must not allow a user to change an existing key: >> MKT parameters are not changed. Instead, new MKTs can be installed, and a connection >> can change which MKT it uses. > > IIUC, it means that one can't just change an existing MKT, but one can > remove and later > add MKT with the same (send_id, recv_id, src_addr/port, dst_addr/port). > > So, a reasonable thing to do: > if (key_info) > return -EEXIST. You're right, making the user delete keys explicitly is better. -- Regards, Leonard
On 8/11/21 2:29 AM, Leonard Crestez wrote: > On 8/10/21 11:41 PM, Dmitry Safonov wrote: >> Hi Leonard, >> >> On Tue, 10 Aug 2021 at 02:50, Leonard Crestez <cdleonard@gmail.com> >> wrote: >> [..] >>> +/* Representation of a Master Key Tuple as per RFC5925 */ >>> +struct tcp_authopt_key_info { >>> + struct hlist_node node; >>> + /* Local identifier */ >>> + u32 local_id; >> >> There is no local_id in RFC5925, what's that? >> An MKT is identified by (send_id, recv_id), together with >> (src_addr/src_port, dst_addr/dst_port). >> Why introducing something new to already complicated RFC? > > It was there to simplify user interface and initial implementation. > > But it seems that BGP listeners already needs to support multiple > keychains for different peers so identifying the key by (send_id, > recv_id, binding) is easier for userspace to work with. Otherwise they > need to create their own local_id mapping internally. > any proposed simplification needs to be well explained and how it relates to the RFC spec.
On 8/11/21 9:29 AM, Leonard Crestez wrote: > On 8/10/21 11:41 PM, Dmitry Safonov wrote: [..] >>> + u32 flags; >>> + /* Wire identifiers */ >>> + u8 send_id, recv_id; >>> + u8 alg_id; >>> + u8 keylen; >>> + u8 key[TCP_AUTHOPT_MAXKEYLEN]; >>> + struct rcu_head rcu; >> >> This is unaligned and will add padding. > > Not clear padding matters. Moving rcu_head higher would avoid it, is > that what you're suggesting. Yes. >> I wonder if it's also worth saving some bytes by something like >> struct tcp_ao_key *key; >> >> With >> struct tcp_ao_key { >> u8 keylen; >> u8 key[0]; >> }; >> >> Hmm? > > This increases memory management complexity for very minor gains. Very > few tcp_authopt_key will ever be created. The change doesn't seem to be big, like: --- a/net/ipv4/tcp_authopt.c +++ b/net/ipv4/tcp_authopt.c @@ -422,8 +422,16 @@ int tcp_set_authopt_key(struct sock *sk, sockptr_t optval, unsig> key_info = __tcp_authopt_key_info_lookup(sk, info, opt.local_id); if (key_info) tcp_authopt_key_del(sk, info, key_info); + + key = sock_kmalloc(sk, sizeof(*key) + opt.keylen, GFP_KERNEL | __GFP_ZERO); + if (!key) { + tcp_authopt_alg_release(alg); + return -ENOMEM; + } + key_info = sock_kmalloc(sk, sizeof(*key_info), GFP_KERNEL | __GFP_ZERO); if (!key_info) { + sock_kfree_s(sk, key, sizeof(*key) + opt.keylen); tcp_authopt_alg_release(alg); return -ENOMEM; } I don't know, probably it'll be enough for every user to limit their keys by length of 80, but if one day it won't be enough - this ABI will be painful to extend. [..] >>> +#define TCP_AUTHOPT 38 /* TCP Authentication >>> Option (RFC2385) */ >>> +#define TCP_AUTHOPT_KEY 39 /* TCP Authentication >>> Option update key (RFC2385) */ >> >> RFC2385 is md5 one. >> Also, should there be TCP_AUTHOPT_ADD_KEY, TCP_AUTHOPT_DELTE_KEY >> instead of using flags inside setsocketopt()? (no hard feelings) > > Fixed RFC reference. > > TCP_AUTHOPT_DELETE_KEY could be clearer, I just wanted to avoid bloating > the sockopt space. But there's not any scarcity. > > For reference tcp_md5 handles key deletion based on keylen == 0. This > seems wrong to me, an empty key is in fact valid though not realistic. > > If local_id is removed in favor of "full match on id and binding" then > the delete sockopt would still need most of a full struct > tcp_authopt_key anyway. Sounds like a plan. [..]>> I'm not sure what's the use of enum here, probably: > #define >> TCP_AUTHOPT_FLAG_REJECT_UNEXPECTED BIT(2) > > This is an enum because it looks nice in kernel-doc. I couldn't find a > way to attach docs to a macro and include it somewhere else. Yeah, ok, seems like good justification. > BTW, the enum gains more members later. > > As for BIT() it doesn't see to be allowed in uapi and there were recent > changes removing such usage. Ok, I just saw it's still used in include/uapi, but not aware of the removal. > >> [..] >>> +struct tcp_authopt_key { >>> + /** @flags: Combination of &enum tcp_authopt_key_flag */ >>> + __u32 flags; >>> + /** @local_id: Local identifier */ >>> + __u32 local_id; >>> + /** @send_id: keyid value for send */ >>> + __u8 send_id; >>> + /** @recv_id: keyid value for receive */ >>> + __u8 recv_id; >>> + /** @alg: One of &enum tcp_authopt_alg */ >>> + __u8 alg; >>> + /** @keylen: Length of the key buffer */ >>> + __u8 keylen; >>> + /** @key: Secret key */ >>> + __u8 key[TCP_AUTHOPT_MAXKEYLEN]; >>> + /** >>> + * @addr: Key is only valid for this address >>> + * >>> + * Ignored unless TCP_AUTHOPT_KEY_ADDR_BIND flag is set >>> + */ >>> + struct __kernel_sockaddr_storage addr; >>> +}; >> >> It'll be an ABI if this is accepted. As it is - it can't support >> RFC5925 fully. >> Extending syscall ABI is painful. I think, even the initial ABI *must* >> support >> all possible features of the RFC. >> In other words, there must be src_addr, src_port, dst_addr and dst_port. >> I can see from docs you've written you don't want to support a mix of >> different >> addr/port MKTs, so you can return -EINVAL or -ENOTSUPP for any value >> but zero. >> This will create an ABI that can be later extended to fully support RFC. > > RFC states that MKT connection identifiers can be specified using ranges > and wildcards and the details are up to the implementation. Keys are > *NOT* just bound to a classical TCP 4-tuple. > > * src_addr and src_port is implicit in socket binding. Maybe in theory > src_addr they could apply for a server socket bound to 0.0.0.0:PORT but > userspace can just open more sockets. > * dst_port is not supported by MD5 and I can't think of any useful > usecase. This is either well known (179 for BGP) or auto-allocated. > * tcp_md5 was recently enhanced allow a "prefixlen" for addr and > "l3mdev" ifindex binding. > > This last point shows that the binding features people require can't be > easily predicted in advance so it's better to allow the rules to be > extended. Yeah, I see both changes you mention went on easy way as they reused existing paddings in the ABI structures. Ok, if you don't want to reserve src_addr/src_port/dst_addr/dst_port, than how about reserving some space for those instead? >> The same is about key: I don't think you need to define/use >> tcp_authopt_alg. >> Just use algo name - that way TCP-AO will automatically be able to use >> any algo supported by crypto engine. >> See how xfrm does it, e.g.: >> struct xfrm_algo_auth { >> char alg_name[64]; >> unsigned int alg_key_len; /* in bits */ >> unsigned int alg_trunc_len; /* in bits */ >> char alg_key[0]; >> }; >> >> So you can let a user chose maclen instead of hard-coding it. >> Much more extendable than what you propose. > > This complicates ABI and implementation with features that are not > needed. I'd much rather only expose an enum of real-world tcp-ao > algorithms. I see it exactly the opposite way: a new enum unnecessary complicates ABI, instead of passing alg_name[] to crypto engine. No need to add any support in tcp-ao as the algorithms are already provided by kernel. That is how it transparently works for ipsec, why not for tcp-ao? > >> [..] >>> +#ifdef CONFIG_TCP_AUTHOPT >>> + case TCP_AUTHOPT: { >>> + struct tcp_authopt info; >>> + >>> + if (get_user(len, optlen)) >>> + return -EFAULT; >>> + >>> + lock_sock(sk); >>> + tcp_get_authopt_val(sk, &info); >>> + release_sock(sk); >>> + >>> + len = min_t(unsigned int, len, sizeof(info)); >>> + if (put_user(len, optlen)) >>> + return -EFAULT; >>> + if (copy_to_user(optval, &info, len)) >>> + return -EFAULT; >>> + return 0; >>> + } >> >> I'm pretty sure it's not a good choice to write partly tcp_authopt. >> And a user has no way to check what's the correct len on this kernel. >> Instead of len = min_t(unsigned int, len, sizeof(info)), it should be >> if (len != sizeof(info)) >> return -EINVAL; > > Purpose is to allow sockopts to grow as md5 has grown. md5 has not grown. See above. Another issue with your approach + /* If userspace optlen is too short fill the rest with zeros */ + if (optlen > sizeof(opt)) + return -EINVAL; + memset(&opt, 0, sizeof(opt)); + if (copy_from_sockptr(&opt, optval, optlen)) + return -EFAULT; is that userspace compiled with updated/grew structure will fail on older kernel. So, no extension without breaking something is possible. Which also reminds me that currently you don't validate (opt.flags) for unknown by kernel flags. Extending syscalls is impossible without breaking userspace if ABI is not designed with extensibility in mind. That was quite a big problem, and still is. Please, read this carefully: https://lwn.net/Articles/830666/ That is why I'm suggesting you all those changes that will be harder to fix when/if your patches get accepted. As an example how it should work see in copy_clone_args_from_user(). [..] Thanks, Dmitry
On 8/11/21 8:31 AM, Dmitry Safonov wrote: > On 8/11/21 9:29 AM, Leonard Crestez wrote: >> On 8/10/21 11:41 PM, Dmitry Safonov wrote: > [..] >>>> + u32 flags; >>>> + /* Wire identifiers */ >>>> + u8 send_id, recv_id; >>>> + u8 alg_id; >>>> + u8 keylen; >>>> + u8 key[TCP_AUTHOPT_MAXKEYLEN]; >>>> + struct rcu_head rcu; >>> >>> This is unaligned and will add padding. >> >> Not clear padding matters. Moving rcu_head higher would avoid it, is >> that what you're suggesting. > > Yes. > >>> I wonder if it's also worth saving some bytes by something like >>> struct tcp_ao_key *key; >>> >>> With >>> struct tcp_ao_key { >>> u8 keylen; >>> u8 key[0]; >>> }; >>> >>> Hmm? >> >> This increases memory management complexity for very minor gains. Very >> few tcp_authopt_key will ever be created. > > The change doesn't seem to be big, like: > --- a/net/ipv4/tcp_authopt.c > +++ b/net/ipv4/tcp_authopt.c > @@ -422,8 +422,16 @@ int tcp_set_authopt_key(struct sock *sk, sockptr_t > optval, unsig> > key_info = __tcp_authopt_key_info_lookup(sk, info, opt.local_id); > if (key_info) > tcp_authopt_key_del(sk, info, key_info); > + > + key = sock_kmalloc(sk, sizeof(*key) + opt.keylen, GFP_KERNEL | > __GFP_ZERO); > + if (!key) { > + tcp_authopt_alg_release(alg); > + return -ENOMEM; > + } > + > key_info = sock_kmalloc(sk, sizeof(*key_info), GFP_KERNEL | > __GFP_ZERO); > if (!key_info) { > + sock_kfree_s(sk, key, sizeof(*key) + opt.keylen); > tcp_authopt_alg_release(alg); > return -ENOMEM; > } > > I don't know, probably it'll be enough for every user to limit their > keys by length of 80, but if one day it won't be enough - this ABI will > be painful to extend. > > [..] >>>> +#define TCP_AUTHOPT 38 /* TCP Authentication >>>> Option (RFC2385) */ >>>> +#define TCP_AUTHOPT_KEY 39 /* TCP Authentication >>>> Option update key (RFC2385) */ >>> >>> RFC2385 is md5 one. >>> Also, should there be TCP_AUTHOPT_ADD_KEY, TCP_AUTHOPT_DELTE_KEY >>> instead of using flags inside setsocketopt()? (no hard feelings) >> >> Fixed RFC reference. >> >> TCP_AUTHOPT_DELETE_KEY could be clearer, I just wanted to avoid bloating >> the sockopt space. But there's not any scarcity. >> >> For reference tcp_md5 handles key deletion based on keylen == 0. This >> seems wrong to me, an empty key is in fact valid though not realistic. >> >> If local_id is removed in favor of "full match on id and binding" then >> the delete sockopt would still need most of a full struct >> tcp_authopt_key anyway. > > Sounds like a plan. > > [..]>> I'm not sure what's the use of enum here, probably: > #define >>> TCP_AUTHOPT_FLAG_REJECT_UNEXPECTED BIT(2) >> >> This is an enum because it looks nice in kernel-doc. I couldn't find a >> way to attach docs to a macro and include it somewhere else. > > Yeah, ok, seems like good justification. > >> BTW, the enum gains more members later. >> >> As for BIT() it doesn't see to be allowed in uapi and there were recent >> changes removing such usage. > > Ok, I just saw it's still used in include/uapi, but not aware of the > removal. > >> >>> [..] >>>> +struct tcp_authopt_key { >>>> + /** @flags: Combination of &enum tcp_authopt_key_flag */ >>>> + __u32 flags; >>>> + /** @local_id: Local identifier */ >>>> + __u32 local_id; >>>> + /** @send_id: keyid value for send */ >>>> + __u8 send_id; >>>> + /** @recv_id: keyid value for receive */ >>>> + __u8 recv_id; >>>> + /** @alg: One of &enum tcp_authopt_alg */ >>>> + __u8 alg; >>>> + /** @keylen: Length of the key buffer */ >>>> + __u8 keylen; >>>> + /** @key: Secret key */ >>>> + __u8 key[TCP_AUTHOPT_MAXKEYLEN]; >>>> + /** >>>> + * @addr: Key is only valid for this address >>>> + * >>>> + * Ignored unless TCP_AUTHOPT_KEY_ADDR_BIND flag is set >>>> + */ >>>> + struct __kernel_sockaddr_storage addr; >>>> +}; >>> >>> It'll be an ABI if this is accepted. As it is - it can't support >>> RFC5925 fully. >>> Extending syscall ABI is painful. I think, even the initial ABI *must* >>> support >>> all possible features of the RFC. >>> In other words, there must be src_addr, src_port, dst_addr and dst_port. >>> I can see from docs you've written you don't want to support a mix of >>> different >>> addr/port MKTs, so you can return -EINVAL or -ENOTSUPP for any value >>> but zero. >>> This will create an ABI that can be later extended to fully support RFC. >> >> RFC states that MKT connection identifiers can be specified using ranges >> and wildcards and the details are up to the implementation. Keys are >> *NOT* just bound to a classical TCP 4-tuple. >> >> * src_addr and src_port is implicit in socket binding. Maybe in theory >> src_addr they could apply for a server socket bound to 0.0.0.0:PORT but >> userspace can just open more sockets. >> * dst_port is not supported by MD5 and I can't think of any useful >> usecase. This is either well known (179 for BGP) or auto-allocated. >> * tcp_md5 was recently enhanced allow a "prefixlen" for addr and >> "l3mdev" ifindex binding. >> >> This last point shows that the binding features people require can't be >> easily predicted in advance so it's better to allow the rules to be >> extended. > > Yeah, I see both changes you mention went on easy way as they reused > existing paddings in the ABI structures. > Ok, if you don't want to reserve src_addr/src_port/dst_addr/dst_port, > than how about reserving some space for those instead? > >>> The same is about key: I don't think you need to define/use >>> tcp_authopt_alg. >>> Just use algo name - that way TCP-AO will automatically be able to use >>> any algo supported by crypto engine. >>> See how xfrm does it, e.g.: >>> struct xfrm_algo_auth { >>> char alg_name[64]; >>> unsigned int alg_key_len; /* in bits */ >>> unsigned int alg_trunc_len; /* in bits */ >>> char alg_key[0]; >>> }; >>> >>> So you can let a user chose maclen instead of hard-coding it. >>> Much more extendable than what you propose. >> >> This complicates ABI and implementation with features that are not >> needed. I'd much rather only expose an enum of real-world tcp-ao >> algorithms. > > I see it exactly the opposite way: a new enum unnecessary complicates > ABI, instead of passing alg_name[] to crypto engine. No need to add any > support in tcp-ao as the algorithms are already provided by kernel. > That is how it transparently works for ipsec, why not for tcp-ao? > >> >>> [..] >>>> +#ifdef CONFIG_TCP_AUTHOPT >>>> + case TCP_AUTHOPT: { >>>> + struct tcp_authopt info; >>>> + >>>> + if (get_user(len, optlen)) >>>> + return -EFAULT; >>>> + >>>> + lock_sock(sk); >>>> + tcp_get_authopt_val(sk, &info); >>>> + release_sock(sk); >>>> + >>>> + len = min_t(unsigned int, len, sizeof(info)); >>>> + if (put_user(len, optlen)) >>>> + return -EFAULT; >>>> + if (copy_to_user(optval, &info, len)) >>>> + return -EFAULT; >>>> + return 0; >>>> + } >>> >>> I'm pretty sure it's not a good choice to write partly tcp_authopt. >>> And a user has no way to check what's the correct len on this kernel. >>> Instead of len = min_t(unsigned int, len, sizeof(info)), it should be >>> if (len != sizeof(info)) >>> return -EINVAL; >> >> Purpose is to allow sockopts to grow as md5 has grown. > > md5 has not grown. See above. MD5 uapi has - e.g., 8917a777be3ba and 6b102db50cdde. We want similar capabilities for growth with this API. > > Another issue with your approach > > + /* If userspace optlen is too short fill the rest with zeros */ > + if (optlen > sizeof(opt)) > + return -EINVAL; > + memset(&opt, 0, sizeof(opt)); > + if (copy_from_sockptr(&opt, optval, optlen)) > + return -EFAULT; > > is that userspace compiled with updated/grew structure will fail on > older kernel. So, no extension without breaking something is possible. > Which also reminds me that currently you don't validate (opt.flags) for > unknown by kernel flags. > > Extending syscalls is impossible without breaking userspace if ABI is > not designed with extensibility in mind. That was quite a big problem, > and still is. Please, read this carefully: > https://lwn.net/Articles/830666/ > > That is why I'm suggesting you all those changes that will be harder to > fix when/if your patches get accepted. > As an example how it should work see in copy_clone_args_from_user(). > Look at how TCP_ZEROCOPY_RECEIVE has grown over releases as an example of how to properly handle this.
On 11.08.2021 17:31, Dmitry Safonov wrote: > On 8/11/21 9:29 AM, Leonard Crestez wrote: >> On 8/10/21 11:41 PM, Dmitry Safonov wrote: >>> I wonder if it's also worth saving some bytes by something like >>> struct tcp_ao_key *key; >>> >>> With >>> struct tcp_ao_key { >>> u8 keylen; >>> u8 key[0]; >>> }; >>> >>> Hmm? >> >> This increases memory management complexity for very minor gains. Very >> few tcp_authopt_key will ever be created. > > The change doesn't seem to be big, like: > --- a/net/ipv4/tcp_authopt.c > +++ b/net/ipv4/tcp_authopt.c > @@ -422,8 +422,16 @@ int tcp_set_authopt_key(struct sock *sk, sockptr_t > optval, unsig> > key_info = __tcp_authopt_key_info_lookup(sk, info, opt.local_id); > if (key_info) > tcp_authopt_key_del(sk, info, key_info); > + > + key = sock_kmalloc(sk, sizeof(*key) + opt.keylen, GFP_KERNEL | > __GFP_ZERO); > + if (!key) { > + tcp_authopt_alg_release(alg); > + return -ENOMEM; > + } > + > key_info = sock_kmalloc(sk, sizeof(*key_info), GFP_KERNEL | > __GFP_ZERO); > if (!key_info) { > + sock_kfree_s(sk, key, sizeof(*key) + opt.keylen); > tcp_authopt_alg_release(alg); > return -ENOMEM; > } > > I don't know, probably it'll be enough for every user to limit their > keys by length of 80, but if one day it won't be enough - this ABI will > be painful to extend. struct tcp_authopt_key also needs to be modified and a separate copy_from_user is required. It's not very complicated but not very useful either. >>>> +struct tcp_authopt_key { >>>> + /** @flags: Combination of &enum tcp_authopt_key_flag */ >>>> + __u32 flags; >>>> + /** @local_id: Local identifier */ >>>> + __u32 local_id; >>>> + /** @send_id: keyid value for send */ >>>> + __u8 send_id; >>>> + /** @recv_id: keyid value for receive */ >>>> + __u8 recv_id; >>>> + /** @alg: One of &enum tcp_authopt_alg */ >>>> + __u8 alg; >>>> + /** @keylen: Length of the key buffer */ >>>> + __u8 keylen; >>>> + /** @key: Secret key */ >>>> + __u8 key[TCP_AUTHOPT_MAXKEYLEN]; >>>> + /** >>>> + * @addr: Key is only valid for this address >>>> + * >>>> + * Ignored unless TCP_AUTHOPT_KEY_ADDR_BIND flag is set >>>> + */ >>>> + struct __kernel_sockaddr_storage addr; >>>> +}; >>> >>> It'll be an ABI if this is accepted. As it is - it can't support >>> RFC5925 fully. >>> Extending syscall ABI is painful. I think, even the initial ABI *must* >>> support >>> all possible features of the RFC. >>> In other words, there must be src_addr, src_port, dst_addr and dst_port. >>> I can see from docs you've written you don't want to support a mix of >>> different >>> addr/port MKTs, so you can return -EINVAL or -ENOTSUPP for any value >>> but zero. >>> This will create an ABI that can be later extended to fully support RFC. >> >> RFC states that MKT connection identifiers can be specified using ranges >> and wildcards and the details are up to the implementation. Keys are >> *NOT* just bound to a classical TCP 4-tuple. >> >> * src_addr and src_port is implicit in socket binding. Maybe in theory >> src_addr they could apply for a server socket bound to 0.0.0.0:PORT but >> userspace can just open more sockets. >> * dst_port is not supported by MD5 and I can't think of any useful >> usecase. This is either well known (179 for BGP) or auto-allocated. >> * tcp_md5 was recently enhanced allow a "prefixlen" for addr and >> "l3mdev" ifindex binding. >> >> This last point shows that the binding features people require can't be >> easily predicted in advance so it's better to allow the rules to be >> extended. > > Yeah, I see both changes you mention went on easy way as they reused > existing paddings in the ABI structures. > Ok, if you don't want to reserve src_addr/src_port/dst_addr/dst_port, > than how about reserving some space for those instead? My idea was that each additional binding featurs can be added as a new bit flag in tcp_authopt_key_flag and the structure extended. Older applications won't pass the flag and kernel will silently accept the shorter optval and you get compatibility. As far as I can tell MD5 supports binding in 3 ways: 1) By dst ip address 2) By dst ip address and prefixlen 3) By ifindex for vrfs Current version of tcp_authopt only supports (1) but I think adding the other methods isn't actually difficult at all. I'd rather not guess at future features by adding unused fields. >>> The same is about key: I don't think you need to define/use >>> tcp_authopt_alg. >>> Just use algo name - that way TCP-AO will automatically be able to use >>> any algo supported by crypto engine. >>> See how xfrm does it, e.g.: >>> struct xfrm_algo_auth { >>> char alg_name[64]; >>> unsigned int alg_key_len; /* in bits */ >>> unsigned int alg_trunc_len; /* in bits */ >>> char alg_key[0]; >>> }; >>> >>> So you can let a user chose maclen instead of hard-coding it. >>> Much more extendable than what you propose. >> >> This complicates ABI and implementation with features that are not >> needed. I'd much rather only expose an enum of real-world tcp-ao >> algorithms. > > I see it exactly the opposite way: a new enum unnecessary complicates > ABI, instead of passing alg_name[] to crypto engine. No need to add any > support in tcp-ao as the algorithms are already provided by kernel. > That is how it transparently works for ipsec, why not for tcp-ao? The TCP Authentication Option standard has been out there for many many years now and alternative algorithms are not widely used. I think cisco has a third algorithm? What you're asking for is a large extension of the IETF standard. If you look at the rest of this series I had a lot of trouble with crypto tfm allocation context so I had to create per-cpu pool, similar to tcp-md5. Should I potentially create a pool for each alg known to crypto-api? Letting use control algorithms and traffickey and mac lengths creates new potential avenues for privilege escalation that need to be checked. As much as possible I would like to avoid exposing the linux crypto api through TCP sockopts. I was also thinking of having a non-namespaced sysctl to disable tcp_authopt by default for security reasons. Unless you're running a router you should never let userspace touch these options. >>> [..] >>>> +#ifdef CONFIG_TCP_AUTHOPT >>>> + case TCP_AUTHOPT: { >>>> + struct tcp_authopt info; >>>> + >>>> + if (get_user(len, optlen)) >>>> + return -EFAULT; >>>> + >>>> + lock_sock(sk); >>>> + tcp_get_authopt_val(sk, &info); >>>> + release_sock(sk); >>>> + >>>> + len = min_t(unsigned int, len, sizeof(info)); >>>> + if (put_user(len, optlen)) >>>> + return -EFAULT; >>>> + if (copy_to_user(optval, &info, len)) >>>> + return -EFAULT; >>>> + return 0; >>>> + } >>> >>> I'm pretty sure it's not a good choice to write partly tcp_authopt. >>> And a user has no way to check what's the correct len on this kernel. >>> Instead of len = min_t(unsigned int, len, sizeof(info)), it should be >>> if (len != sizeof(info)) >>> return -EINVAL; >> >> Purpose is to allow sockopts to grow as md5 has grown. > > md5 has not grown. See above. > > Another issue with your approach > > + /* If userspace optlen is too short fill the rest with zeros */ > + if (optlen > sizeof(opt)) > + return -EINVAL; > + memset(&opt, 0, sizeof(opt)); > + if (copy_from_sockptr(&opt, optval, optlen)) > + return -EFAULT; > > is that userspace compiled with updated/grew structure will fail on > older kernel. So, no extension without breaking something is possible. Userspace that needs new features and also compatibility with older kernels could check something like uname. I think this is already a problem with md5: passing TCP_MD5SIG_FLAG_PREFIX on an old kernel simply won't take effect and that's fine. The bigger concern is to ensure that old binaries work without changes. > Which also reminds me that currently you don't validate (opt.flags) for > unknown by kernel flags. Not sure what you mean, it is explicitly only known flags that are copied from userspace. I can make setsockopt return an error on unknown flags, this will make new apps fail more explicitly on old kernels. > Extending syscalls is impossible without breaking userspace if ABI is > not designed with extensibility in mind. That was quite a big problem, > and still is. Please, read this carefully: > https://lwn.net/Articles/830666/ > > That is why I'm suggesting you all those changes that will be harder to > fix when/if your patches get accepted. Both of the sockopt structs have a "flags" field and structure expansion will be accompanied by new flags. Is this not sufficient for compatibility?
On 11.08.2021 16:42, David Ahern wrote: > On 8/11/21 2:29 AM, Leonard Crestez wrote: >> On 8/10/21 11:41 PM, Dmitry Safonov wrote: >>> Hi Leonard, >>> >>> On Tue, 10 Aug 2021 at 02:50, Leonard Crestez <cdleonard@gmail.com> >>> wrote: >>> [..] >>>> +/* Representation of a Master Key Tuple as per RFC5925 */ >>>> +struct tcp_authopt_key_info { >>>> + struct hlist_node node; >>>> + /* Local identifier */ >>>> + u32 local_id; >>> >>> There is no local_id in RFC5925, what's that? >>> An MKT is identified by (send_id, recv_id), together with >>> (src_addr/src_port, dst_addr/dst_port). >>> Why introducing something new to already complicated RFC? >> >> It was there to simplify user interface and initial implementation. >> >> But it seems that BGP listeners already needs to support multiple >> keychains for different peers so identifying the key by (send_id, >> recv_id, binding) is easier for userspace to work with. Otherwise they >> need to create their own local_id mapping internally. >> > > any proposed simplification needs to be well explained and how it > relates to the RFC spec. The local_id only exists between userspace and kernel so it's not really covered by the RFC. There are objections to this and it seems to be unhelpful for userspace zo I will replace it with match by binding. BTW: another somewhat dubious simplification is that I offloaded the RFC requirement to never add overlapping keys to userspace. So if userspace adds keys with same recvid that match the same TCP 4-tuple then connections will just start failing. It's arguably fine to allow userspace misconfiguration to cause failures. -- Regards, Leonard
Hi David, On 8/11/21 6:15 PM, David Ahern wrote: > On 8/11/21 8:31 AM, Dmitry Safonov wrote: >> On 8/11/21 9:29 AM, Leonard Crestez wrote: >>> On 8/10/21 11:41 PM, Dmitry Safonov wrote: [..] >>>> I'm pretty sure it's not a good choice to write partly tcp_authopt. >>>> And a user has no way to check what's the correct len on this kernel. >>>> Instead of len = min_t(unsigned int, len, sizeof(info)), it should be >>>> if (len != sizeof(info)) >>>> return -EINVAL; >>> >>> Purpose is to allow sockopts to grow as md5 has grown. >> >> md5 has not grown. See above. > > MD5 uapi has - e.g., 8917a777be3ba and 6b102db50cdde. We want similar > capabilities for growth with this API. So, you mean adding a new setsockopt when the struct has to be extended? Like TCP_AUTHOPT_EXT? It can work, but sounds like adding a new syscall every time the old one can't be extended. I can see that with current limitations on TCP-AO RFC the ABI in these patches will have to be extended. The second commit started using new cmd.tcpm_flags, where unknown flags are still at this moment silently ignored by the kernel. So 6b102db50cdd could have introduced a regression in userspace. By luck and by reason that md5 isn't probably frequently used it didn't. Not nice at all example for newer APIs. >> Another issue with your approach >> >> + /* If userspace optlen is too short fill the rest with zeros */ >> + if (optlen > sizeof(opt)) >> + return -EINVAL; >> + memset(&opt, 0, sizeof(opt)); >> + if (copy_from_sockptr(&opt, optval, optlen)) >> + return -EFAULT; >> >> is that userspace compiled with updated/grew structure will fail on >> older kernel. So, no extension without breaking something is possible. >> Which also reminds me that currently you don't validate (opt.flags) for >> unknown by kernel flags. >> >> Extending syscalls is impossible without breaking userspace if ABI is >> not designed with extensibility in mind. That was quite a big problem, >> and still is. Please, read this carefully: >> https://lwn.net/Articles/830666/ >> >> That is why I'm suggesting you all those changes that will be harder to >> fix when/if your patches get accepted. >> As an example how it should work see in copy_clone_args_from_user(). >> > > Look at how TCP_ZEROCOPY_RECEIVE has grown over releases as an example > of how to properly handle this. Exactly. : switch (len) { : case offsetofend(...) : case offsetofend(...) And than also: : if (unlikely(len > sizeof(zc))) { : err = check_zeroed_user(optval + sizeof(zc), : len - sizeof(zc)); Does it sound similar to what I've written in my ABI review? And what the LWN article has in it. Please, look again at the patch I replied to. Thanks, Dmitry
On 8/11/21 2:12 PM, Dmitry Safonov wrote: > Hi David, > > On 8/11/21 6:15 PM, David Ahern wrote: >> On 8/11/21 8:31 AM, Dmitry Safonov wrote: >>> On 8/11/21 9:29 AM, Leonard Crestez wrote: >>>> On 8/10/21 11:41 PM, Dmitry Safonov wrote: > [..] >>>>> I'm pretty sure it's not a good choice to write partly tcp_authopt. >>>>> And a user has no way to check what's the correct len on this kernel. >>>>> Instead of len = min_t(unsigned int, len, sizeof(info)), it should be >>>>> if (len != sizeof(info)) >>>>> return -EINVAL; >>>> >>>> Purpose is to allow sockopts to grow as md5 has grown. >>> >>> md5 has not grown. See above. >> >> MD5 uapi has - e.g., 8917a777be3ba and 6b102db50cdde. We want similar >> capabilities for growth with this API. > > So, you mean adding a new setsockopt when the struct has to be extended? > Like TCP_AUTHOPT_EXT? uh, no. That was needed because of failures with the original implementation wrt checking that all unused bits are 0. If checking is not done from day 1, that field can never be used in the future. My point here was only that MD5 uapi was extended. My second point is more relevant to Leonard as a very recent example of how to build an extendable struct. >> >> Look at how TCP_ZEROCOPY_RECEIVE has grown over releases as an example >> of how to properly handle this. > > Exactly. > > : switch (len) { > : case offsetofend(...) > : case offsetofend(...) > > And than also: > : if (unlikely(len > sizeof(zc))) { > : err = check_zeroed_user(optval + sizeof(zc), > : len - sizeof(zc)); > > Does it sound similar to what I've written in my ABI review? > And what the LWN article has in it. > Please, look again at the patch I replied to. > > Thanks, > Dmitry >
On 8/11/21 8:11 PM, Leonard Crestez wrote: > On 11.08.2021 16:42, David Ahern wrote: [..] >> >> any proposed simplification needs to be well explained and how it >> relates to the RFC spec. > > The local_id only exists between userspace and kernel so it's not really > covered by the RFC. > > There are objections to this and it seems to be unhelpful for userspace > zo I will replace it with match by binding. > > BTW: another somewhat dubious simplification is that I offloaded the RFC > requirement to never add overlapping keys to userspace. So if userspace > adds keys with same recvid that match the same TCP 4-tuple then > connections will just start failing. > > It's arguably fine to allow userspace misconfiguration to cause failures. I think it's fine. But worth documenting. Also, keep in mind that someone in userspace with his funny ideas might start relying on such behavior in future. Thanks, Dmitry
On 8/11/21 1:11 PM, Leonard Crestez wrote: > On 11.08.2021 16:42, David Ahern wrote: >> On 8/11/21 2:29 AM, Leonard Crestez wrote: >>> On 8/10/21 11:41 PM, Dmitry Safonov wrote: >>>> Hi Leonard, >>>> >>>> On Tue, 10 Aug 2021 at 02:50, Leonard Crestez <cdleonard@gmail.com> >>>> wrote: >>>> [..] >>>>> +/* Representation of a Master Key Tuple as per RFC5925 */ >>>>> +struct tcp_authopt_key_info { >>>>> + struct hlist_node node; >>>>> + /* Local identifier */ >>>>> + u32 local_id; >>>> >>>> There is no local_id in RFC5925, what's that? >>>> An MKT is identified by (send_id, recv_id), together with >>>> (src_addr/src_port, dst_addr/dst_port). >>>> Why introducing something new to already complicated RFC? >>> >>> It was there to simplify user interface and initial implementation. >>> >>> But it seems that BGP listeners already needs to support multiple >>> keychains for different peers so identifying the key by (send_id, >>> recv_id, binding) is easier for userspace to work with. Otherwise they >>> need to create their own local_id mapping internally. >>> >> >> any proposed simplification needs to be well explained and how it >> relates to the RFC spec. > > The local_id only exists between userspace and kernel so it's not really > covered by the RFC. My point is that you need to document the uapi (however it ends up) and how it is used to achieve the intent of the RFC. > > There are objections to this and it seems to be unhelpful for userspace > zo I will replace it with match by binding. > > BTW: another somewhat dubious simplification is that I offloaded the RFC > requirement to never add overlapping keys to userspace. So if userspace > adds keys with same recvid that match the same TCP 4-tuple then > connections will just start failing. > > It's arguably fine to allow userspace misconfiguration to cause failures. > > -- > Regards, > Leonard
On 8/11/21 11:29 AM, Leonard Crestez wrote: > On 8/10/21 11:41 PM, Dmitry Safonov wrote: >> On Tue, 10 Aug 2021 at 02:50, Leonard Crestez <cdleonard@gmail.com> >>> + /* If an old value exists for same local_id it is deleted */ >>> + key_info = __tcp_authopt_key_info_lookup(sk, info, >>> opt.local_id); >>> + if (key_info) >>> + tcp_authopt_key_del(sk, info, key_info); >>> + key_info = sock_kmalloc(sk, sizeof(*key_info), GFP_KERNEL | >>> __GFP_ZERO); >>> + if (!key_info) >>> + return -ENOMEM; >> >> 1. You don't need sock_kmalloc() together with tcp_authopt_key_del(). >> It just frees the memory and allocates it back straight away - no >> sense in doing that. > > The list is scanned in multiple places in later commits using nothing > but an rcu_read_lock, this means that keys can't be updated in-place. > >> 2. I think RFC says you must not allow a user to change an existing key: >>> MKT parameters are not changed. Instead, new MKTs can be installed, >>> and a connection >>> can change which MKT it uses. >> >> IIUC, it means that one can't just change an existing MKT, but one can >> remove and later >> add MKT with the same (send_id, recv_id, src_addr/port, dst_addr/port). >> >> So, a reasonable thing to do: >> if (key_info) >> return -EEXIST. > > You're right, making the user delete keys explicitly is better. On a second thought this might be required to mark keys as "send-only" and "recv-only" atomically. Separately from RFC5925 some vendors implement a "keychain" model based on RFC8177 where each key has a distinct "accept-lifetime" and a "send-lifetime". This could be implemented by adding flags "expired_for_send" and "expired_for_recv" but requires the ability to set an expiration mark without the key ever being deleted. -- Regards, Leonard
diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 48d8a363319e..cfddfc720b00 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -140,10 +140,12 @@ struct tcp_request_sock { static inline struct tcp_request_sock *tcp_rsk(const struct request_sock *req) { return (struct tcp_request_sock *)req; } +struct tcp_authopt_info; + struct tcp_sock { /* inet_connection_sock has to be the first member of tcp_sock */ struct inet_connection_sock inet_conn; u16 tcp_header_len; /* Bytes of tcp header to send */ u16 gso_segs; /* Max number of segs per GSO packet */ @@ -403,10 +405,14 @@ struct tcp_sock { /* TCP MD5 Signature Option information */ struct tcp_md5sig_info __rcu *md5sig_info; #endif +#ifdef CONFIG_TCP_AUTHOPT + struct tcp_authopt_info __rcu *authopt_info; +#endif + /* TCP fastopen related information */ struct tcp_fastopen_request *fastopen_req; /* fastopen_rsk points to request_sock that resulted in this big * socket. Used to retransmit SYNACKs etc. */ diff --git a/include/net/tcp.h b/include/net/tcp.h index 3166dc15d7d6..bb76554e8fe5 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -182,10 +182,11 @@ void tcp_time_wait(struct sock *sk, int state, int timeo); #define TCPOPT_WINDOW 3 /* Window scaling */ #define TCPOPT_SACK_PERM 4 /* SACK Permitted */ #define TCPOPT_SACK 5 /* SACK Block */ #define TCPOPT_TIMESTAMP 8 /* Better RTT estimations/PAWS */ #define TCPOPT_MD5SIG 19 /* MD5 Signature (RFC2385) */ +#define TCPOPT_AUTHOPT 29 /* Auth Option (RFC5925) */ #define TCPOPT_MPTCP 30 /* Multipath TCP (RFC6824) */ #define TCPOPT_FASTOPEN 34 /* Fast open (RFC7413) */ #define TCPOPT_EXP 254 /* Experimental */ /* Magic number to be after the option value for sharing TCP * experimental options. See draft-ietf-tcpm-experimental-options-00.txt diff --git a/include/net/tcp_authopt.h b/include/net/tcp_authopt.h new file mode 100644 index 000000000000..458d108bb7a8 --- /dev/null +++ b/include/net/tcp_authopt.h @@ -0,0 +1,55 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +#ifndef _LINUX_TCP_AUTHOPT_H +#define _LINUX_TCP_AUTHOPT_H + +#include <uapi/linux/tcp.h> + +/* Representation of a Master Key Tuple as per RFC5925 */ +struct tcp_authopt_key_info { + struct hlist_node node; + /* Local identifier */ + u32 local_id; + u32 flags; + /* Wire identifiers */ + u8 send_id, recv_id; + u8 alg_id; + u8 keylen; + u8 key[TCP_AUTHOPT_MAXKEYLEN]; + struct rcu_head rcu; + struct sockaddr_storage addr; +}; + +/* Per-socket information regarding tcp_authopt */ +struct tcp_authopt_info { + /* List of tcp_authopt_key_info */ + struct hlist_head head; + u32 flags; + u32 src_isn; + u32 dst_isn; + struct rcu_head rcu; +}; + +#ifdef CONFIG_TCP_AUTHOPT +void tcp_authopt_clear(struct sock *sk); +int tcp_set_authopt(struct sock *sk, sockptr_t optval, unsigned int optlen); +int tcp_get_authopt_val(struct sock *sk, struct tcp_authopt *key); +int tcp_set_authopt_key(struct sock *sk, sockptr_t optval, unsigned int optlen); +#else +static inline int tcp_set_authopt(struct sock *sk, sockptr_t optval, unsigned int optlen) +{ + return -ENOPROTOOPT; +} +static inline int tcp_get_authopt_val(struct sock *sk, struct tcp_authopt *key) +{ + return -ENOPROTOOPT; +} +static inline void tcp_authopt_clear(struct sock *sk) +{ +} +static inline int tcp_set_authopt_key(struct sock *sk, sockptr_t optval, unsigned int optlen) +{ + return -ENOPROTOOPT; +} +#endif + +#endif /* _LINUX_TCP_AUTHOPT_H */ diff --git a/include/uapi/linux/tcp.h b/include/uapi/linux/tcp.h index 8fc09e8638b3..bc47664156eb 100644 --- a/include/uapi/linux/tcp.h +++ b/include/uapi/linux/tcp.h @@ -126,10 +126,12 @@ enum { #define TCP_INQ 36 /* Notify bytes available to read as a cmsg on read */ #define TCP_CM_INQ TCP_INQ #define TCP_TX_DELAY 37 /* delay outgoing packets by XX usec */ +#define TCP_AUTHOPT 38 /* TCP Authentication Option (RFC2385) */ +#define TCP_AUTHOPT_KEY 39 /* TCP Authentication Option update key (RFC2385) */ #define TCP_REPAIR_ON 1 #define TCP_REPAIR_OFF 0 #define TCP_REPAIR_OFF_NO_WP -1 /* Turn off without window probes */ @@ -340,10 +342,80 @@ struct tcp_diag_md5sig { __u16 tcpm_keylen; __be32 tcpm_addr[4]; __u8 tcpm_key[TCP_MD5SIG_MAXKEYLEN]; }; +/** + * enum tcp_authopt_flag - flags for `tcp_authopt.flags` + */ +enum tcp_authopt_flag { + /** + * @TCP_AUTHOPT_FLAG_REJECT_UNEXPECTED: + * Configure behavior of segments with TCP-AO coming from hosts for which no + * key is configured. The default recommended by RFC is to silently accept + * such connections. + */ + TCP_AUTHOPT_FLAG_REJECT_UNEXPECTED = (1 << 2), +}; + +/** + * struct tcp_authopt - Per-socket options related to TCP Authentication Option + */ +struct tcp_authopt { + /** @flags: Combination of &enum tcp_authopt_flag */ + __u32 flags; +}; + +/** + * enum tcp_authopt_key_flag - flags for `tcp_authopt.flags` + * + * @TCP_AUTHOPT_KEY_DEL: Delete the key by local_id and ignore all other fields. + * @TCP_AUTHOPT_KEY_EXCLUDE_OPTS: Exclude TCP options from signature. + * @TCP_AUTHOPT_KEY_ADDR_BIND: Key only valid for `tcp_authopt.addr` + */ +enum tcp_authopt_key_flag { + TCP_AUTHOPT_KEY_DEL = (1 << 0), + TCP_AUTHOPT_KEY_EXCLUDE_OPTS = (1 << 1), + TCP_AUTHOPT_KEY_ADDR_BIND = (1 << 2), +}; + +/* for TCP_AUTHOPT_KEY socket option */ +#define TCP_AUTHOPT_MAXKEYLEN 80 + +enum tcp_authopt_alg { + TCP_AUTHOPT_ALG_HMAC_SHA_1_96 = 1, + TCP_AUTHOPT_ALG_AES_128_CMAC_96 = 2, +}; + +/** + * struct tcp_authopt_key - TCP Authentication KEY + * + * Each key is identified by a non-zero local_id which is managed by the application. + */ +struct tcp_authopt_key { + /** @flags: Combination of &enum tcp_authopt_key_flag */ + __u32 flags; + /** @local_id: Local identifier */ + __u32 local_id; + /** @send_id: keyid value for send */ + __u8 send_id; + /** @recv_id: keyid value for receive */ + __u8 recv_id; + /** @alg: One of &enum tcp_authopt_alg */ + __u8 alg; + /** @keylen: Length of the key buffer */ + __u8 keylen; + /** @key: Secret key */ + __u8 key[TCP_AUTHOPT_MAXKEYLEN]; + /** + * @addr: Key is only valid for this address + * + * Ignored unless TCP_AUTHOPT_KEY_ADDR_BIND flag is set + */ + struct __kernel_sockaddr_storage addr; +}; + /* setsockopt(fd, IPPROTO_TCP, TCP_ZEROCOPY_RECEIVE, ...) */ #define TCP_RECEIVE_ZEROCOPY_FLAG_TLB_CLEAN_HINT 0x1 struct tcp_zerocopy_receive { __u64 address; /* in: address of mapping */ diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig index 87983e70f03f..6459f4ea6f1d 100644 --- a/net/ipv4/Kconfig +++ b/net/ipv4/Kconfig @@ -740,5 +740,19 @@ config TCP_MD5SIG RFC2385 specifies a method of giving MD5 protection to TCP sessions. Its main (only?) use is to protect BGP sessions between core routers on the Internet. If unsure, say N. + +config TCP_AUTHOPT + bool "TCP: Authentication Option support (RFC5925)" + select CRYPTO + select CRYPTO_SHA1 + select CRYPTO_HMAC + select CRYPTO_AES + select CRYPTO_CMAC + help + RFC5925 specifies a new method of giving protection to TCP sessions. + Its intended use is to protect BGP sessions between core routers + on the Internet. It obsoletes TCP MD5 (RFC2385) but is incompatible. + + If unsure, say N. diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile index bbdd9c44f14e..d336f32ce177 100644 --- a/net/ipv4/Makefile +++ b/net/ipv4/Makefile @@ -59,10 +59,11 @@ obj-$(CONFIG_TCP_CONG_NV) += tcp_nv.o obj-$(CONFIG_TCP_CONG_VENO) += tcp_veno.o obj-$(CONFIG_TCP_CONG_SCALABLE) += tcp_scalable.o obj-$(CONFIG_TCP_CONG_LP) += tcp_lp.o obj-$(CONFIG_TCP_CONG_YEAH) += tcp_yeah.o obj-$(CONFIG_TCP_CONG_ILLINOIS) += tcp_illinois.o +obj-$(CONFIG_TCP_AUTHOPT) += tcp_authopt.o obj-$(CONFIG_NET_SOCK_MSG) += tcp_bpf.o obj-$(CONFIG_BPF_SYSCALL) += udp_bpf.o obj-$(CONFIG_NETLABEL) += cipso_ipv4.o obj-$(CONFIG_XFRM) += xfrm4_policy.o xfrm4_state.o xfrm4_input.o \ diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index f931def6302e..fd90e80afa2c 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -271,10 +271,11 @@ #include <net/icmp.h> #include <net/inet_common.h> #include <net/tcp.h> #include <net/mptcp.h> +#include <net/tcp_authopt.h> #include <net/xfrm.h> #include <net/ip.h> #include <net/sock.h> #include <linux/uaccess.h> @@ -3573,10 +3574,16 @@ static int do_tcp_setsockopt(struct sock *sk, int level, int optname, case TCP_MD5SIG: case TCP_MD5SIG_EXT: err = tp->af_specific->md5_parse(sk, optname, optval, optlen); break; #endif + case TCP_AUTHOPT: + err = tcp_set_authopt(sk, optval, optlen); + break; + case TCP_AUTHOPT_KEY: + err = tcp_set_authopt_key(sk, optval, optlen); + break; case TCP_USER_TIMEOUT: /* Cap the max time in ms TCP will retry or probe the window * before giving up and aborting (ETIMEDOUT) a connection. */ if (val < 0) @@ -4219,10 +4226,30 @@ static int do_tcp_getsockopt(struct sock *sk, int level, if (!err && copy_to_user(optval, &zc, len)) err = -EFAULT; return err; } #endif +#ifdef CONFIG_TCP_AUTHOPT + case TCP_AUTHOPT: { + struct tcp_authopt info; + + if (get_user(len, optlen)) + return -EFAULT; + + lock_sock(sk); + tcp_get_authopt_val(sk, &info); + release_sock(sk); + + len = min_t(unsigned int, len, sizeof(info)); + if (put_user(len, optlen)) + return -EFAULT; + if (copy_to_user(optval, &info, len)) + return -EFAULT; + return 0; + } +#endif + default: return -ENOPROTOOPT; } if (put_user(len, optlen)) diff --git a/net/ipv4/tcp_authopt.c b/net/ipv4/tcp_authopt.c new file mode 100644 index 000000000000..5fa7bce8891b --- /dev/null +++ b/net/ipv4/tcp_authopt.c @@ -0,0 +1,172 @@ +// SPDX-License-Identifier: GPL-2.0-or-later + +#include <linux/kernel.h> +#include <net/tcp.h> +#include <net/tcp_authopt.h> +#include <crypto/hash.h> +#include <trace/events/tcp.h> + +struct tcp_authopt_key_info *__tcp_authopt_key_info_lookup(const struct sock *sk, + struct tcp_authopt_info *info, + int key_id) +{ + struct tcp_authopt_key_info *key; + + hlist_for_each_entry_rcu(key, &info->head, node, lockdep_sock_is_held(sk)) + if (key->local_id == key_id) + return key; + + return NULL; +} + +static struct tcp_authopt_info *__tcp_authopt_info_get_or_create(struct sock *sk) +{ + struct tcp_sock *tp = tcp_sk(sk); + struct tcp_authopt_info *info; + + info = rcu_dereference_check(tp->authopt_info, lockdep_sock_is_held(sk)); + if (info) + return info; + + info = kmalloc(sizeof(*info), GFP_KERNEL | __GFP_ZERO); + if (!info) + return ERR_PTR(-ENOMEM); + + sk_nocaps_add(sk, NETIF_F_GSO_MASK); + INIT_HLIST_HEAD(&info->head); + rcu_assign_pointer(tp->authopt_info, info); + + return info; +} + +int tcp_set_authopt(struct sock *sk, sockptr_t optval, unsigned int optlen) +{ + struct tcp_authopt opt; + struct tcp_authopt_info *info; + + WARN_ON(!lockdep_sock_is_held(sk)); + + /* If userspace optlen is too short fill the rest with zeros */ + if (optlen > sizeof(opt)) + return -EINVAL; + memset(&opt, 0, sizeof(opt)); + if (copy_from_sockptr(&opt, optval, optlen)) + return -EFAULT; + + info = __tcp_authopt_info_get_or_create(sk); + if (IS_ERR(info)) + return PTR_ERR(info); + + info->flags = opt.flags & TCP_AUTHOPT_FLAG_REJECT_UNEXPECTED); + + return 0; +} + +int tcp_get_authopt_val(struct sock *sk, struct tcp_authopt *opt) +{ + struct tcp_sock *tp = tcp_sk(sk); + struct tcp_authopt_info *info; + + WARN_ON(!lockdep_sock_is_held(sk)); + memset(opt, 0, sizeof(*opt)); + info = rcu_dereference_check(tp->authopt_info, lockdep_sock_is_held(sk)); + if (!info) + return -EINVAL; + opt->flags = info->flags & TCP_AUTHOPT_FLAG_REJECT_UNEXPECTED; + + return 0; +} + +static void tcp_authopt_key_del(struct sock *sk, + struct tcp_authopt_info *info, + struct tcp_authopt_key_info *key) +{ + hlist_del_rcu(&key->node); + atomic_sub(sizeof(*key), &sk->sk_omem_alloc); + kfree_rcu(key, rcu); +} + +/* free info and keys but don't touch tp->authopt_info */ +void __tcp_authopt_info_free(struct sock *sk, struct tcp_authopt_info *info) +{ + struct hlist_node *n; + struct tcp_authopt_key_info *key; + + hlist_for_each_entry_safe(key, n, &info->head, node) + tcp_authopt_key_del(sk, info, key); + kfree_rcu(info, rcu); +} + +/* free everything and clear tcp_sock.authopt_info to NULL */ +void tcp_authopt_clear(struct sock *sk) +{ + struct tcp_authopt_info *info; + + info = rcu_dereference_protected(tcp_sk(sk)->authopt_info, lockdep_sock_is_held(sk)); + if (info) { + __tcp_authopt_info_free(sk, info); + tcp_sk(sk)->authopt_info = NULL; + } +} + +int tcp_set_authopt_key(struct sock *sk, sockptr_t optval, unsigned int optlen) +{ + struct tcp_authopt_key opt; + struct tcp_authopt_info *info; + struct tcp_authopt_key_info *key_info; + + /* If userspace optlen is too short fill the rest with zeros */ + if (optlen > sizeof(opt)) + return -EINVAL; + memset(&opt, 0, sizeof(opt)); + if (copy_from_sockptr(&opt, optval, optlen)) + return -EFAULT; + + if (opt.keylen > TCP_AUTHOPT_MAXKEYLEN) + return -EINVAL; + + if (opt.local_id == 0) + return -EINVAL; + + /* Delete is a special case: we ignore all fields other than local_id */ + if (opt.flags & TCP_AUTHOPT_KEY_DEL) { + info = rcu_dereference_check(tcp_sk(sk)->authopt_info, lockdep_sock_is_held(sk)); + if (!info) + return -ENOENT; + key_info = __tcp_authopt_key_info_lookup(sk, info, opt.local_id); + if (!key_info) + return -ENOENT; + tcp_authopt_key_del(sk, info, key_info); + return 0; + } + + /* Initialize tcp_authopt_info if not already set */ + info = __tcp_authopt_info_get_or_create(sk); + if (IS_ERR(info)) + return PTR_ERR(info); + + /* check key family */ + if (opt.flags & TCP_AUTHOPT_KEY_ADDR_BIND) { + if (sk->sk_family != opt.addr.ss_family) + return -EINVAL; + } + + /* If an old value exists for same local_id it is deleted */ + key_info = __tcp_authopt_key_info_lookup(sk, info, opt.local_id); + if (key_info) + tcp_authopt_key_del(sk, info, key_info); + key_info = sock_kmalloc(sk, sizeof(*key_info), GFP_KERNEL | __GFP_ZERO); + if (!key_info) + return -ENOMEM; + key_info->local_id = opt.local_id; + key_info->flags = opt.flags & (TCP_AUTHOPT_KEY_EXCLUDE_OPTS | TCP_AUTHOPT_KEY_ADDR_BIND); + key_info->send_id = opt.send_id; + key_info->recv_id = opt.recv_id; + key_info->alg_id = opt.alg; + key_info->keylen = opt.keylen; + memcpy(key_info->key, opt.key, opt.keylen); + memcpy(&key_info->addr, &opt.addr, sizeof(key_info->addr)); + hlist_add_head_rcu(&key_info->node, &info->head); + + return 0; +} diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 2e62e0d6373a..1348615c7576 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -60,10 +60,11 @@ #include <net/net_namespace.h> #include <net/icmp.h> #include <net/inet_hashtables.h> #include <net/tcp.h> +#include <net/tcp_authopt.h> #include <net/transp_v6.h> #include <net/ipv6.h> #include <net/inet_common.h> #include <net/timewait_sock.h> #include <net/xfrm.h> @@ -2256,10 +2257,11 @@ void tcp_v4_destroy_sock(struct sock *sk) tcp_clear_md5_list(sk); kfree_rcu(rcu_dereference_protected(tp->md5sig_info, 1), rcu); tp->md5sig_info = NULL; } #endif + tcp_authopt_clear(sk); /* Clean up a referenced TCP bind bucket. */ if (inet_csk(sk)->icsk_bind_hash) inet_put_port(sk);
This commit add support to add and remove keys but does not use them further. Similar to tcp md5 a single point to a struct tcp_authopt_info* struct is added to struct tcp_sock in order to avoid increasing memory usage for everybody. The data structures related to tcp_authopt are initialized on setsockopt and only removed on socket close. Signed-off-by: Leonard Crestez <cdleonard@gmail.com> --- include/linux/tcp.h | 6 ++ include/net/tcp.h | 1 + include/net/tcp_authopt.h | 55 ++++++++++++ include/uapi/linux/tcp.h | 72 ++++++++++++++++ net/ipv4/Kconfig | 14 ++++ net/ipv4/Makefile | 1 + net/ipv4/tcp.c | 27 ++++++ net/ipv4/tcp_authopt.c | 172 ++++++++++++++++++++++++++++++++++++++ net/ipv4/tcp_ipv4.c | 2 + 9 files changed, 350 insertions(+) create mode 100644 include/net/tcp_authopt.h create mode 100644 net/ipv4/tcp_authopt.c