Message ID | 20240827120805.13681-13-antonio@openvpn.net (mailing list archive) |
---|---|
State | Changes Requested |
Delegated to: | Netdev Maintainers |
Headers | show |
Series | Introducing OpenVPN Data Channel Offload | expand |
2024-08-27, 14:07:52 +0200, Antonio Quartulli wrote: > +/* this swap is not atomic, but there will be a very short time frame where the Since we're under a mutex, I think we might get put to sleep for a not-so-short time frame. > + * old_secondary key won't be available. This should not be a big deal as most I could be misreading the code, but isn't it old_primary that's unavailable during the swap? rcu_replace_pointer overwrites cs->primary, so before the final assign, both slots contain old_secondary? > + * likely both peers are already using the new primary at this point. > + */ > +void ovpn_crypto_key_slots_swap(struct ovpn_crypto_state *cs) > +{ > + const struct ovpn_crypto_key_slot *old_primary, *old_secondary; > + > + mutex_lock(&cs->mutex); > + > + old_secondary = rcu_dereference_protected(cs->secondary, > + lockdep_is_held(&cs->mutex)); > + old_primary = rcu_replace_pointer(cs->primary, old_secondary, > + lockdep_is_held(&cs->mutex)); > + rcu_assign_pointer(cs->secondary, old_primary); > + > + pr_debug("key swapped: %u <-> %u\n", > + old_primary ? old_primary->key_id : 0, > + old_secondary ? old_secondary->key_id : 0); > + > + mutex_unlock(&cs->mutex); > +} [...] > +int ovpn_aead_encrypt(struct ovpn_peer *peer, struct ovpn_crypto_key_slot *ks, > + struct sk_buff *skb) > +{ > + const unsigned int tag_size = crypto_aead_authsize(ks->encrypt); > + const unsigned int head_size = ovpn_aead_encap_overhead(ks); > + DECLARE_CRYPTO_WAIT(wait); nit: unused > + struct aead_request *req; > + struct sk_buff *trailer; > + struct scatterlist *sg; > + u8 iv[NONCE_SIZE]; > + int nfrags, ret; > + u32 pktid, op; > + > + /* Sample AEAD header format: > + * 48000001 00000005 7e7046bd 444a7e28 cc6387b1 64a4d6c1 380275a... > + * [ OP32 ] [seq # ] [ auth tag ] [ payload ... ] > + * [4-byte > + * IV head] > + */ > + > + /* check that there's enough headroom in the skb for packet > + * encapsulation, after adding network header and encryption overhead > + */ > + if (unlikely(skb_cow_head(skb, OVPN_HEAD_ROOM + head_size))) > + return -ENOBUFS; > + > + /* get number of skb frags and ensure that packet data is writable */ > + nfrags = skb_cow_data(skb, 0, &trailer); > + if (unlikely(nfrags < 0)) > + return nfrags; > + > + if (unlikely(nfrags + 2 > (MAX_SKB_FRAGS + 2))) > + return -ENOSPC; > + > + ovpn_skb_cb(skb)->ctx = kmalloc(sizeof(*ovpn_skb_cb(skb)->ctx), > + GFP_ATOMIC); > + if (unlikely(!ovpn_skb_cb(skb)->ctx)) > + return -ENOMEM; I think you should clear skb->cb (or at least ->ctx) at the start of ovpn_aead_encrypt. I don't think it will be cleaned up by the previous user, and if we fail before this alloc, we will possibly have bogus values in ->ctx when we get to kfree(ovpn_skb_cb(skb)->ctx) at the end of ovpn_encrypt_post. (Similar comments around cb/ctx freeing and initialization apply to ovpn_aead_decrypt and ovpn_decrypt_post) > + sg = ovpn_skb_cb(skb)->ctx->sg; > + > + /* sg table: > + * 0: op, wire nonce (AD, len=OVPN_OP_SIZE_V2+NONCE_WIRE_SIZE), > + * 1, 2, 3, ..., n: payload, > + * n+1: auth_tag (len=tag_size) > + */ > + sg_init_table(sg, nfrags + 2); > + > + /* build scatterlist to encrypt packet payload */ > + ret = skb_to_sgvec_nomark(skb, sg + 1, 0, skb->len); > + if (unlikely(nfrags != ret)) { > + kfree(sg); This is the only location in this function (and ovpn_encrypt_post) that frees sg. Is that correct? sg points to an array contained within ->ctx, I don't think you want to free that directly. > + return -EINVAL; > + } > + > + /* append auth_tag onto scatterlist */ > + __skb_push(skb, tag_size); > + sg_set_buf(sg + nfrags + 1, skb->data, tag_size); > + > + /* obtain packet ID, which is used both as a first > + * 4 bytes of nonce and last 4 bytes of associated data. > + */ > + ret = ovpn_pktid_xmit_next(&ks->pid_xmit, &pktid); > + if (unlikely(ret < 0)) { > + kfree(ovpn_skb_cb(skb)->ctx); Isn't that going to cause a double-free when we get to the end of ovpn_encrypt_post? Or even UAF when we try to get ks/peer at the start? > + return ret; > + } > + > + /* concat 4 bytes packet id and 8 bytes nonce tail into 12 bytes > + * nonce > + */ > + ovpn_pktid_aead_write(pktid, &ks->nonce_tail_xmit, iv); > + > + /* make space for packet id and push it to the front */ > + __skb_push(skb, NONCE_WIRE_SIZE); > + memcpy(skb->data, iv, NONCE_WIRE_SIZE); > + > + /* add packet op as head of additional data */ > + op = ovpn_opcode_compose(OVPN_DATA_V2, ks->key_id, peer->id); > + __skb_push(skb, OVPN_OP_SIZE_V2); > + BUILD_BUG_ON(sizeof(op) != OVPN_OP_SIZE_V2); > + *((__force __be32 *)skb->data) = htonl(op); > + > + /* AEAD Additional data */ > + sg_set_buf(sg, skb->data, OVPN_OP_SIZE_V2 + NONCE_WIRE_SIZE); > + > + req = aead_request_alloc(ks->encrypt, GFP_ATOMIC); > + if (unlikely(!req)) { > + kfree(ovpn_skb_cb(skb)->ctx); Same here. > + return -ENOMEM; > + } > + > + /* setup async crypto operation */ > + aead_request_set_tfm(req, ks->encrypt); > + aead_request_set_callback(req, 0, ovpn_aead_encrypt_done, skb); > + aead_request_set_crypt(req, sg, sg, skb->len - head_size, iv); > + aead_request_set_ad(req, OVPN_OP_SIZE_V2 + NONCE_WIRE_SIZE); > + > + ovpn_skb_cb(skb)->ctx->peer = peer; > + ovpn_skb_cb(skb)->ctx->req = req; > + ovpn_skb_cb(skb)->ctx->ks = ks; > + > + /* encrypt it */ > + return crypto_aead_encrypt(req); > +} > + > +static void ovpn_aead_decrypt_done(void *data, int ret) > +{ > + struct sk_buff *skb = data; > + > + aead_request_free(ovpn_skb_cb(skb)->ctx->req); This function only gets called in the async case. Where's the corresponding aead_request_free in the sync case? (same for encrypt) This should be moved into ovpn_decrypt_post, I think. > + ovpn_decrypt_post(skb, ret); > +} > + > +int ovpn_aead_decrypt(struct ovpn_peer *peer, struct ovpn_crypto_key_slot *ks, > + struct sk_buff *skb) > +{ > + const unsigned int tag_size = crypto_aead_authsize(ks->decrypt); > + int ret, payload_len, nfrags; > + unsigned int payload_offset; > + DECLARE_CRYPTO_WAIT(wait); nit: unused [...] > -static void ovpn_encrypt_post(struct sk_buff *skb, int ret) > +void ovpn_encrypt_post(struct sk_buff *skb, int ret) > { > - struct ovpn_peer *peer = ovpn_skb_cb(skb)->peer; > + struct ovpn_crypto_key_slot *ks = ovpn_skb_cb(skb)->ctx->ks; > + struct ovpn_peer *peer = ovpn_skb_cb(skb)->ctx->peer; ovpn_skb_cb(skb)->ctx may not have been set by ovpn_aead_encrypt. > + > + /* encryption is happening asynchronously. This function will be > + * called later by the crypto callback with a proper return value > + */ > + if (unlikely(ret == -EINPROGRESS)) > + return; > > if (unlikely(ret < 0)) > goto err; > > skb_mark_not_on_list(skb); > > + kfree(ovpn_skb_cb(skb)->ctx); > + > switch (peer->sock->sock->sk->sk_protocol) { > case IPPROTO_UDP: > ovpn_udp_send_skb(peer->ovpn, peer, skb); > break; > default: > /* no transport configured yet */ > goto err; ovpn_skb_cb(skb)->ctx has just been freed before this switch, and here we jump to err and free it again. > } > /* skb passed down the stack - don't free it */ > skb = NULL; > err: > if (unlikely(skb)) { > dev_core_stats_tx_dropped_inc(peer->ovpn->dev); > - kfree_skb(skb); > + kfree(ovpn_skb_cb(skb)->ctx); > } > + kfree_skb(skb); > + ovpn_crypto_key_slot_put(ks); > ovpn_peer_put(peer); > }
Hi, On 02/09/2024 16:42, Sabrina Dubroca wrote: > 2024-08-27, 14:07:52 +0200, Antonio Quartulli wrote: >> +/* this swap is not atomic, but there will be a very short time frame where the > > Since we're under a mutex, I think we might get put to sleep for a > not-so-short time frame. > >> + * old_secondary key won't be available. This should not be a big deal as most > > I could be misreading the code, but isn't it old_primary that's > unavailable during the swap? rcu_replace_pointer overwrites > cs->primary, so before the final assign, both slots contain > old_secondary? Right. The comment is not correct. cs->secondary (old_secondary, that is the newest key) is what is probably being used by the other peer for sending traffic. Therefore old_secondary is what is likely to be needed. However, this is pure speculation and may not be accurate. The fact that we could sleep before having completed the swap sounds like something we want to avoid. Maybe I should convert this mutex to a spinlock. Its usage is fairly contained anyway. > >> + * likely both peers are already using the new primary at this point. >> + */ >> +void ovpn_crypto_key_slots_swap(struct ovpn_crypto_state *cs) >> +{ >> + const struct ovpn_crypto_key_slot *old_primary, *old_secondary; >> + >> + mutex_lock(&cs->mutex); >> + >> + old_secondary = rcu_dereference_protected(cs->secondary, >> + lockdep_is_held(&cs->mutex)); >> + old_primary = rcu_replace_pointer(cs->primary, old_secondary, >> + lockdep_is_held(&cs->mutex)); >> + rcu_assign_pointer(cs->secondary, old_primary); >> + >> + pr_debug("key swapped: %u <-> %u\n", >> + old_primary ? old_primary->key_id : 0, >> + old_secondary ? old_secondary->key_id : 0); >> + >> + mutex_unlock(&cs->mutex); >> +} > > [...] >> +int ovpn_aead_encrypt(struct ovpn_peer *peer, struct ovpn_crypto_key_slot *ks, >> + struct sk_buff *skb) >> +{ >> + const unsigned int tag_size = crypto_aead_authsize(ks->encrypt); >> + const unsigned int head_size = ovpn_aead_encap_overhead(ks); >> + DECLARE_CRYPTO_WAIT(wait); > > nit: unused ACK > >> + struct aead_request *req; >> + struct sk_buff *trailer; >> + struct scatterlist *sg; >> + u8 iv[NONCE_SIZE]; >> + int nfrags, ret; >> + u32 pktid, op; >> + >> + /* Sample AEAD header format: >> + * 48000001 00000005 7e7046bd 444a7e28 cc6387b1 64a4d6c1 380275a... >> + * [ OP32 ] [seq # ] [ auth tag ] [ payload ... ] >> + * [4-byte >> + * IV head] >> + */ >> + >> + /* check that there's enough headroom in the skb for packet >> + * encapsulation, after adding network header and encryption overhead >> + */ >> + if (unlikely(skb_cow_head(skb, OVPN_HEAD_ROOM + head_size))) >> + return -ENOBUFS; >> + >> + /* get number of skb frags and ensure that packet data is writable */ >> + nfrags = skb_cow_data(skb, 0, &trailer); >> + if (unlikely(nfrags < 0)) >> + return nfrags; >> + >> + if (unlikely(nfrags + 2 > (MAX_SKB_FRAGS + 2))) >> + return -ENOSPC; >> + >> + ovpn_skb_cb(skb)->ctx = kmalloc(sizeof(*ovpn_skb_cb(skb)->ctx), >> + GFP_ATOMIC); >> + if (unlikely(!ovpn_skb_cb(skb)->ctx)) >> + return -ENOMEM; > > I think you should clear skb->cb (or at least ->ctx) at the start of > ovpn_aead_encrypt. I don't think it will be cleaned up by the previous > user, and if we fail before this alloc, we will possibly have bogus > values in ->ctx when we get to kfree(ovpn_skb_cb(skb)->ctx) at the end > of ovpn_encrypt_post. > > (Similar comments around cb/ctx freeing and initialization apply to > ovpn_aead_decrypt and ovpn_decrypt_post) good point - will clear it in both cases. > >> + sg = ovpn_skb_cb(skb)->ctx->sg; >> + >> + /* sg table: >> + * 0: op, wire nonce (AD, len=OVPN_OP_SIZE_V2+NONCE_WIRE_SIZE), >> + * 1, 2, 3, ..., n: payload, >> + * n+1: auth_tag (len=tag_size) >> + */ >> + sg_init_table(sg, nfrags + 2); >> + >> + /* build scatterlist to encrypt packet payload */ >> + ret = skb_to_sgvec_nomark(skb, sg + 1, 0, skb->len); >> + if (unlikely(nfrags != ret)) { >> + kfree(sg); > > This is the only location in this function (and ovpn_encrypt_post) > that frees sg. Is that correct? sg points to an array contained within > ->ctx, I don't think you want to free that directly. No, it is not correct. As you pointed out 'sg' is an actual array so should not be free'd. This is a leftover of the first version where 'sg' was kmalloc'd. FTR: this restructuring is the result of having tested encryption/decryption with pcrypt: sg, that is passed to the crypto code, was initially allocated on the stack, which was obviously not working for async crypto. The solution was to make it part of the skb CB area, so that it can be carried around until crypto is done. > >> + return -EINVAL; >> + } >> + >> + /* append auth_tag onto scatterlist */ >> + __skb_push(skb, tag_size); >> + sg_set_buf(sg + nfrags + 1, skb->data, tag_size); >> + >> + /* obtain packet ID, which is used both as a first >> + * 4 bytes of nonce and last 4 bytes of associated data. >> + */ >> + ret = ovpn_pktid_xmit_next(&ks->pid_xmit, &pktid); >> + if (unlikely(ret < 0)) { >> + kfree(ovpn_skb_cb(skb)->ctx); > > Isn't that going to cause a double-free when we get to the end of > ovpn_encrypt_post? Or even UAF when we try to get ks/peer at the > start? ouch. you're right. I should better leave it alone and let ovpn_encrypt_post take care of free'ing it at the end. > >> + return ret; >> + } >> + >> + /* concat 4 bytes packet id and 8 bytes nonce tail into 12 bytes >> + * nonce >> + */ >> + ovpn_pktid_aead_write(pktid, &ks->nonce_tail_xmit, iv); >> + >> + /* make space for packet id and push it to the front */ >> + __skb_push(skb, NONCE_WIRE_SIZE); >> + memcpy(skb->data, iv, NONCE_WIRE_SIZE); >> + >> + /* add packet op as head of additional data */ >> + op = ovpn_opcode_compose(OVPN_DATA_V2, ks->key_id, peer->id); >> + __skb_push(skb, OVPN_OP_SIZE_V2); >> + BUILD_BUG_ON(sizeof(op) != OVPN_OP_SIZE_V2); >> + *((__force __be32 *)skb->data) = htonl(op); >> + >> + /* AEAD Additional data */ >> + sg_set_buf(sg, skb->data, OVPN_OP_SIZE_V2 + NONCE_WIRE_SIZE); >> + >> + req = aead_request_alloc(ks->encrypt, GFP_ATOMIC); >> + if (unlikely(!req)) { >> + kfree(ovpn_skb_cb(skb)->ctx); > > Same here. yap > >> + return -ENOMEM; >> + } >> + >> + /* setup async crypto operation */ >> + aead_request_set_tfm(req, ks->encrypt); >> + aead_request_set_callback(req, 0, ovpn_aead_encrypt_done, skb); >> + aead_request_set_crypt(req, sg, sg, skb->len - head_size, iv); >> + aead_request_set_ad(req, OVPN_OP_SIZE_V2 + NONCE_WIRE_SIZE); >> + >> + ovpn_skb_cb(skb)->ctx->peer = peer; >> + ovpn_skb_cb(skb)->ctx->req = req; >> + ovpn_skb_cb(skb)->ctx->ks = ks; >> + >> + /* encrypt it */ >> + return crypto_aead_encrypt(req); >> +} >> + >> +static void ovpn_aead_decrypt_done(void *data, int ret) >> +{ >> + struct sk_buff *skb = data; >> + >> + aead_request_free(ovpn_skb_cb(skb)->ctx->req); > > This function only gets called in the async case. Where's the > corresponding aead_request_free in the sync case? (same for encrypt) > This should be moved into ovpn_decrypt_post, I think. I agree, although I am pretty sure I moved it here in the past for a reason. Maybe code changes eliminated that reason. anyway, thanks for spotting this. I'll definitely move it to the _post functions. > >> + ovpn_decrypt_post(skb, ret); >> +} >> + >> +int ovpn_aead_decrypt(struct ovpn_peer *peer, struct ovpn_crypto_key_slot *ks, >> + struct sk_buff *skb) >> +{ >> + const unsigned int tag_size = crypto_aead_authsize(ks->decrypt); >> + int ret, payload_len, nfrags; >> + unsigned int payload_offset; >> + DECLARE_CRYPTO_WAIT(wait); > > nit: unused ACK > > > [...] >> -static void ovpn_encrypt_post(struct sk_buff *skb, int ret) >> +void ovpn_encrypt_post(struct sk_buff *skb, int ret) >> { >> - struct ovpn_peer *peer = ovpn_skb_cb(skb)->peer; >> + struct ovpn_crypto_key_slot *ks = ovpn_skb_cb(skb)->ctx->ks; >> + struct ovpn_peer *peer = ovpn_skb_cb(skb)->ctx->peer; > > ovpn_skb_cb(skb)->ctx may not have been set by ovpn_aead_encrypt. right. we may have hit a return before kmalloc'ing it. > >> + >> + /* encryption is happening asynchronously. This function will be >> + * called later by the crypto callback with a proper return value >> + */ >> + if (unlikely(ret == -EINPROGRESS)) >> + return; >> >> if (unlikely(ret < 0)) >> goto err; >> >> skb_mark_not_on_list(skb); >> >> + kfree(ovpn_skb_cb(skb)->ctx); >> + >> switch (peer->sock->sock->sk->sk_protocol) { >> case IPPROTO_UDP: >> ovpn_udp_send_skb(peer->ovpn, peer, skb); >> break; >> default: >> /* no transport configured yet */ >> goto err; > > ovpn_skb_cb(skb)->ctx has just been freed before this switch, and here > we jump to err and free it again. Thanks. Will reworka bit the error path here. > >> } >> /* skb passed down the stack - don't free it */ >> skb = NULL; >> err: >> if (unlikely(skb)) { >> dev_core_stats_tx_dropped_inc(peer->ovpn->dev); >> - kfree_skb(skb); >> + kfree(ovpn_skb_cb(skb)->ctx); >> } >> + kfree_skb(skb); >> + ovpn_crypto_key_slot_put(ks); >> ovpn_peer_put(peer); >> } > This patch was basically re-written after realizing that the async crypto path was not working as expected, therefore sorry if there were some "kinda obvious" mistakes. Thanks a lot for your review.
2024-09-04, 14:07:23 +0200, Antonio Quartulli wrote: > Hi, > > On 02/09/2024 16:42, Sabrina Dubroca wrote: > > 2024-08-27, 14:07:52 +0200, Antonio Quartulli wrote: > > > +/* this swap is not atomic, but there will be a very short time frame where the > > > > Since we're under a mutex, I think we might get put to sleep for a > > not-so-short time frame. > > > > > + * old_secondary key won't be available. This should not be a big deal as most > > > > I could be misreading the code, but isn't it old_primary that's > > unavailable during the swap? rcu_replace_pointer overwrites > > cs->primary, so before the final assign, both slots contain > > old_secondary? > > Right. The comment is not correct. > > cs->secondary (old_secondary, that is the newest key) is what is probably > being used by the other peer for sending traffic. Right, thanks. I was getting confused about the key slots and which key was the newest. If the peer has already started sending with the newest key, no problem. If we're swapping keys before our peer (or we're on a slow network and the peer's packets get delayed), we'll still be receiving packets encrypted with the old key. > Therefore old_secondary is what is likely to be needed. > > However, this is pure speculation and may not be accurate. I can think of a few possibilities if this causes too many unwanted drops: - use a linked list of keys, set the primary instead of swapping, and let delete remove the unused key(s) by ID instead of slot - decouple the TX and RX keys, which also means you don't really need to swap keys (swap for the TX key becomes "set primary", swap on RX can become a noop since you check both slots for the correct keyid) -- and here too delete becomes based on key ID - if cs->mutex becomes a spinlock, take it in the datapath when looking up keys. this will make sure we get a consistent view of the keys state. - come up with a scheme to let the datapath retry the key lookup if it didn't find the key it wanted (maybe something like a seqcount, or maybe taking the lock and retrying if the lookup failed) I don't know if options 1 and 2 are possible based on how openvpn (the protocol and the userspace application) models keys, but they seem a bit "cleaner" on the datapath side (no locking, no retry). But they require a different API. Do you have the same problem in the current userspace implementation? > The fact that we could sleep before having completed the swap sounds like > something we want to avoid. > Maybe I should convert this mutex to a spinlock. Its usage is fairly > contained anyway. I think it would make sense. It's only being held for very short periods, just to set/swap a few pointers. > FTR: this restructuring is the result of having tested encryption/decryption > with pcrypt: sg, that is passed to the crypto code, was initially allocated > on the stack, which was obviously not working for async crypto. > The solution was to make it part of the skb CB area, so that it can be > carried around until crypto is done. I see. I thought this patch looked less familiar than the others :) An alternative to using the CB is what IPsec does: allocate a chunk of memory for all its temporary needs (crypto req, sg, iv, anything else it needs during async crypto) and carve the pointers/small chunks out of it. See esp_alloc_tmp in net/ipv4/esp4.c. (I'm just mentioning that for reference/curiosity, not asking that you change ovpn) > This patch was basically re-written after realizing that the async crypto > path was not working as expected, therefore sorry if there were some "kinda > obvious" mistakes. And I completely missed some of those issues in previous reviews. > Thanks a lot for your review. Cheers, and thanks for your patience.
On 04/09/2024 17:01, Sabrina Dubroca wrote: > 2024-09-04, 14:07:23 +0200, Antonio Quartulli wrote: >> Hi, >> >> On 02/09/2024 16:42, Sabrina Dubroca wrote: >>> 2024-08-27, 14:07:52 +0200, Antonio Quartulli wrote: >>>> +/* this swap is not atomic, but there will be a very short time frame where the >>> >>> Since we're under a mutex, I think we might get put to sleep for a >>> not-so-short time frame. >>> >>>> + * old_secondary key won't be available. This should not be a big deal as most >>> >>> I could be misreading the code, but isn't it old_primary that's >>> unavailable during the swap? rcu_replace_pointer overwrites >>> cs->primary, so before the final assign, both slots contain >>> old_secondary? >> >> Right. The comment is not correct. >> >> cs->secondary (old_secondary, that is the newest key) is what is probably >> being used by the other peer for sending traffic. > > Right, thanks. I was getting confused about the key slots and which > key was the newest. > > If the peer has already started sending with the newest key, no > problem. If we're swapping keys before our peer (or we're on a slow > network and the peer's packets get delayed), we'll still be receiving > packets encrypted with the old key. Right, it's mostly impossible to make the swap happen at the same time on both ends. > > >> Therefore old_secondary is what is likely to be needed. >> >> However, this is pure speculation and may not be accurate. > > I can think of a few possibilities if this causes too many unwanted > drops: > > - use a linked list of keys, set the primary instead of swapping, and > let delete remove the unused key(s) by ID instead of slot > > - decouple the TX and RX keys, which also means you don't really need > to swap keys (swap for the TX key becomes "set primary", swap on RX > can become a noop since you check both slots for the correct keyid) > -- and here too delete becomes based on key ID > > - if cs->mutex becomes a spinlock, take it in the datapath when > looking up keys. this will make sure we get a consistent view of > the keys state. > > - come up with a scheme to let the datapath retry the key lookup if > it didn't find the key it wanted (maybe something like a seqcount, > or maybe taking the lock and retrying if the lookup failed) > > I don't know if options 1 and 2 are possible based on how openvpn (the > protocol and the userspace application) models keys, but they seem a > bit "cleaner" on the datapath side (no locking, no retry). But they > require a different API. After chewing over all these ideas I think we can summarize the requirements as follows: * PRIMARY and SECONDARY in the API is just an abstraction for "KEY FOR TX" and "THE OTHER KEY"; * SWAP means "mark for TX the key that currently was not marked"; * we only need a pointer to the key for TX; * key for RX is picked up by key ID; Therefore, how about having an array large enough to store all key IDs (max ID is 7): * array index is the key ID (fast lookup on RX); * upon SET_KEY we store the provided keys and erase the rest; * upon SET_KEY we store in a new field the index/ID of the PRIMARY (fast lookup on TX), namely primary_id; * upon SWAP we just save in primary_id the "other" index/ID; * at any given time we will have only two keys in the array. It's pretty much like your option 1 and 2, but using an array indexed by key ID. The concept of slot is a bit lost, but it is not important as long as we can keep the API and its semantics the same. Opinions? > > Do you have the same problem in the current userspace implementation? userspace is single-threaded :-P nothing happens while we are busy swapping. > > >> The fact that we could sleep before having completed the swap sounds like >> something we want to avoid. >> Maybe I should convert this mutex to a spinlock. Its usage is fairly >> contained anyway. > > I think it would make sense. It's only being held for very short > periods, just to set/swap a few pointers. ACK > > >> FTR: this restructuring is the result of having tested encryption/decryption >> with pcrypt: sg, that is passed to the crypto code, was initially allocated >> on the stack, which was obviously not working for async crypto. >> The solution was to make it part of the skb CB area, so that it can be >> carried around until crypto is done. > > I see. I thought this patch looked less familiar than the others :) > > An alternative to using the CB is what IPsec does: allocate a chunk of > memory for all its temporary needs (crypto req, sg, iv, anything else > it needs during async crypto) and carve the pointers/small chunks out > of it. See esp_alloc_tmp in net/ipv4/esp4.c. (I'm just mentioning > that for reference/curiosity, not asking that you change ovpn) > > >> This patch was basically re-written after realizing that the async crypto >> path was not working as expected, therefore sorry if there were some "kinda >> obvious" mistakes. > > And I completely missed some of those issues in previous reviews. > >> Thanks a lot for your review. > > Cheers, and thanks for your patience. > Cheers!
On Tue, Aug 27, 2024 at 02:07:52PM +0200, Antonio Quartulli wrote: > This change implements encryption/decryption and > encapsulation/decapsulation of OpenVPN packets. > > Support for generic crypto state is added along with > a wrapper for the AEAD crypto kernel API. > > Signed-off-by: Antonio Quartulli <antonio@openvpn.net> ... > diff --git a/drivers/net/ovpn/io.c b/drivers/net/ovpn/io.c ... > @@ -54,39 +56,122 @@ static void ovpn_netdev_write(struct ovpn_peer *peer, struct sk_buff *skb) > dev_sw_netstats_rx_add(peer->ovpn->dev, skb->len); > } > > -static void ovpn_decrypt_post(struct sk_buff *skb, int ret) > +void ovpn_decrypt_post(struct sk_buff *skb, int ret) > { > - struct ovpn_peer *peer = ovpn_skb_cb(skb)->peer; > + struct ovpn_crypto_key_slot *ks = ovpn_skb_cb(skb)->ctx->ks; > + struct ovpn_peer *peer = ovpn_skb_cb(skb)->ctx->peer; > + __be16 proto; > + __be32 *pid; > > - if (unlikely(ret < 0)) > + /* crypto is happening asyncronously. this function will be called nit: asynchronously Flagged by checkpatch.pl --codespell > + * again later by the crypto callback with a proper return code > + */ > + if (unlikely(ret == -EINPROGRESS)) > + return; ...
On 06/09/2024 21:29, Simon Horman wrote: > On Tue, Aug 27, 2024 at 02:07:52PM +0200, Antonio Quartulli wrote: >> This change implements encryption/decryption and >> encapsulation/decapsulation of OpenVPN packets. >> >> Support for generic crypto state is added along with >> a wrapper for the AEAD crypto kernel API. >> >> Signed-off-by: Antonio Quartulli <antonio@openvpn.net> > > ... > >> diff --git a/drivers/net/ovpn/io.c b/drivers/net/ovpn/io.c > > ... > >> @@ -54,39 +56,122 @@ static void ovpn_netdev_write(struct ovpn_peer *peer, struct sk_buff *skb) >> dev_sw_netstats_rx_add(peer->ovpn->dev, skb->len); >> } >> >> -static void ovpn_decrypt_post(struct sk_buff *skb, int ret) >> +void ovpn_decrypt_post(struct sk_buff *skb, int ret) >> { >> - struct ovpn_peer *peer = ovpn_skb_cb(skb)->peer; >> + struct ovpn_crypto_key_slot *ks = ovpn_skb_cb(skb)->ctx->ks; >> + struct ovpn_peer *peer = ovpn_skb_cb(skb)->ctx->peer; >> + __be16 proto; >> + __be32 *pid; >> >> - if (unlikely(ret < 0)) >> + /* crypto is happening asyncronously. this function will be called > > nit: asynchronously > > Flagged by checkpatch.pl --codespell Thanks! I forgot to add the codespell flag to my last checkpatch run :-( Will fix it in the next version. Cheers, > >> + * again later by the crypto callback with a proper return code >> + */ >> + if (unlikely(ret == -EINPROGRESS)) >> + return; > > ...
2024-09-06, 15:19:08 +0200, Antonio Quartulli wrote: > On 04/09/2024 17:01, Sabrina Dubroca wrote: > > 2024-09-04, 14:07:23 +0200, Antonio Quartulli wrote: > > > On 02/09/2024 16:42, Sabrina Dubroca wrote: > > > > 2024-08-27, 14:07:52 +0200, Antonio Quartulli wrote: > > I can think of a few possibilities if this causes too many unwanted > > drops: > > > > - use a linked list of keys, set the primary instead of swapping, and > > let delete remove the unused key(s) by ID instead of slot > > > > - decouple the TX and RX keys, which also means you don't really need > > to swap keys (swap for the TX key becomes "set primary", swap on RX > > can become a noop since you check both slots for the correct keyid) > > -- and here too delete becomes based on key ID > > > > - if cs->mutex becomes a spinlock, take it in the datapath when > > looking up keys. this will make sure we get a consistent view of > > the keys state. > > > > - come up with a scheme to let the datapath retry the key lookup if > > it didn't find the key it wanted (maybe something like a seqcount, > > or maybe taking the lock and retrying if the lookup failed) > > > > I don't know if options 1 and 2 are possible based on how openvpn (the > > protocol and the userspace application) models keys, but they seem a > > bit "cleaner" on the datapath side (no locking, no retry). But they > > require a different API. > > After chewing over all these ideas I think we can summarize the requirements > as follows: > > * PRIMARY and SECONDARY in the API is just an abstraction for "KEY FOR TX" > and "THE OTHER KEY"; > * SWAP means "mark for TX the key that currently was not marked"; > * we only need a pointer to the key for TX; > * key for RX is picked up by key ID; > > Therefore, how about having an array large enough to store all key IDs (max > ID is 7): Right, I forgot about that. For 8 keys, sure, array sounds ok. MACsec has only 4 and I also implemented it as an array (include/net/macsec.h: macsec_{tx,rx}_sc, sa array). > * array index is the key ID (fast lookup on RX); > * upon SET_KEY we store the provided keys and erase the rest; I'm not sure about erasing the rest. If you're installing the secondary ("next primary") key, you can't just erase the current primary? ("erase the rest" also means you'd only ever have one key in the array, which contradicts your last item in this list) > * upon SET_KEY we store in a new field the index/ID of the PRIMARY (fast > lookup on TX), namely primary_id; > * upon SWAP we just save in primary_id the "other" index/ID; I'm confused by these two. Both SET_KEY and SWAP modify the primary_id? > * at any given time we will have only two keys in the array. This needs to be enforced in the SET_KEY implementation, otherwise SWAP will have inconsistent effects. > It's pretty much like your option 1 and 2, but using an array > indexed by key ID. Are you decoupling TX and RX keys (so that they can be installed independently) in this proposal? I can't really tell, the array could be key pairs or separate. > The concept of slot is a bit lost, but it is not important as long as we can > keep the API and its semantics the same. Would it be a problem for userspace to keep track of key IDs, and use that (instead of slots) to communicate with the kernel? Make setkey/delkey be based on keyid, and replace swap with a set_tx operation which updates the current keyid for TX. That would seem like a better API, especially for the per-ID array you're proposing here. The concept of slots would be almost completely lost (only "current tx key" is left, which is similar to the "primary slot"). (it becomes almost identical to the way MACsec does things, except possibly that in MACsec the TX and RX keys are not paired at all, so you can install/delete a TX key without touching any RX key) Maybe you can also rework the current code to look a bit like this array-based proposal, and not give up the notion of slots, if that's something strongly built into the core of openvpn: - primary/secondary in ovpn_crypto_state become slots[2] - add a one-bit "primary" reference, used to look into the slots array - swap just flips that "primary" bit - TX uses slots[primary] - RX keeps inspecting both slots (now slot[0] and slot[1] instead of primary/secondary) looking for the correct keyid - setkey/delkey still operate on primary/secondary, but need to figure out whether slot[0] or slot[1] is primary/secondary based on ->primary It's almost identical to the current code (and API), except you don't need to reassign any pointers to swap keys, so it should avoid the rekey issue.
On 10/09/2024 15:04, Sabrina Dubroca wrote: > 2024-09-06, 15:19:08 +0200, Antonio Quartulli wrote: >> On 04/09/2024 17:01, Sabrina Dubroca wrote: >>> 2024-09-04, 14:07:23 +0200, Antonio Quartulli wrote: >>>> On 02/09/2024 16:42, Sabrina Dubroca wrote: >>>>> 2024-08-27, 14:07:52 +0200, Antonio Quartulli wrote: >>> I can think of a few possibilities if this causes too many unwanted >>> drops: >>> >>> - use a linked list of keys, set the primary instead of swapping, and >>> let delete remove the unused key(s) by ID instead of slot >>> >>> - decouple the TX and RX keys, which also means you don't really need >>> to swap keys (swap for the TX key becomes "set primary", swap on RX >>> can become a noop since you check both slots for the correct keyid) >>> -- and here too delete becomes based on key ID >>> >>> - if cs->mutex becomes a spinlock, take it in the datapath when >>> looking up keys. this will make sure we get a consistent view of >>> the keys state. >>> >>> - come up with a scheme to let the datapath retry the key lookup if >>> it didn't find the key it wanted (maybe something like a seqcount, >>> or maybe taking the lock and retrying if the lookup failed) >>> >>> I don't know if options 1 and 2 are possible based on how openvpn (the >>> protocol and the userspace application) models keys, but they seem a >>> bit "cleaner" on the datapath side (no locking, no retry). But they >>> require a different API. >> >> After chewing over all these ideas I think we can summarize the requirements >> as follows: >> >> * PRIMARY and SECONDARY in the API is just an abstraction for "KEY FOR TX" >> and "THE OTHER KEY"; >> * SWAP means "mark for TX the key that currently was not marked"; >> * we only need a pointer to the key for TX; >> * key for RX is picked up by key ID; >> >> Therefore, how about having an array large enough to store all key IDs (max >> ID is 7): > > Right, I forgot about that. For 8 keys, sure, array sounds ok. MACsec > has only 4 and I also implemented it as an array > (include/net/macsec.h: macsec_{tx,rx}_sc, sa array). thanks for the pointer! > >> * array index is the key ID (fast lookup on RX); >> * upon SET_KEY we store the provided keys and erase the rest; > > I'm not sure about erasing the rest. If you're installing the > secondary ("next primary") key, you can't just erase the current > primary? ("erase the rest" also means you'd only ever have one key in > the array, which contradicts your last item in this list) Yeah, my point is wrong. I fooled myself assuming SET_KEY would install two keys each time, but this is not the case, so we can't "erase everything else". What we can do is: * if SET_KEY wants to install a key in PRIMARY, then we erase the key marked as "primary" and we mark the new one as such * if SET_KEY wants to install a key in SECONDARY, then we erase the key without any mark. This way we simulate writing into slots. > >> * upon SET_KEY we store in a new field the index/ID of the PRIMARY (fast >> lookup on TX), namely primary_id; FTR: this is what "marking as primary" means >> * upon SWAP we just save in primary_id the "other" index/ID; > > I'm confused by these two. Both SET_KEY and SWAP modify the primary_id? Yes. SET_KEY may want to install a key in the primary slot: in that case the new key has to be marked as primary immediately. This normally happens only upon first SET_KEY after connection, when no other key exists. SWAP will switch primary_id to the other key ID. makes sense? > >> * at any given time we will have only two keys in the array. > > This needs to be enforced in the SET_KEY implementation, otherwise > SWAP will have inconsistent effects. yap. I hope what I wrote above about SET_KEY helps clarifying this part. > > >> It's pretty much like your option 1 and 2, but using an array >> indexed by key ID. > > Are you decoupling TX and RX keys (so that they can be installed > independently) in this proposal? I can't really tell, the array could > be key pairs or separate. I would not decouple TX and RX keys. Each array item should be the same as what we are now storing into each slot (struct ovpn_crypto_key_slot *). I could use two arrays, instead of an array of slots, but that means changing way more logic and I don't see the benefit. > > >> The concept of slot is a bit lost, but it is not important as long as we can >> keep the API and its semantics the same. > > Would it be a problem for userspace to keep track of key IDs, and use > that (instead of slots) to communicate with the kernel? Make > setkey/delkey be based on keyid, and replace swap with a set_tx > operation which updates the current keyid for TX. That would seem like > a better API, especially for the per-ID array you're proposing > here. The concept of slots would be almost completely lost (only > "current tx key" is left, which is similar to the "primary slot"). > (it becomes almost identical to the way MACsec does things, except > possibly that in MACsec the TX and RX keys are not paired at all, so > you can install/delete a TX key without touching any RX key) > unfortunately changing userspace that way is not currently viable. There are more components (and documentation) that attach to this slot abstraction. Hence my proposal to keep the API the same, but rework the internal for better implementation. > > Maybe you can also rework the current code to look a bit like this > array-based proposal, and not give up the notion of slots, if that's > something strongly built into the core of openvpn: > > - primary/secondary in ovpn_crypto_state become slots[2] > - add a one-bit "primary" reference, used to look into the slots array > - swap just flips that "primary" bit > - TX uses slots[primary] > - RX keeps inspecting both slots (now slot[0] and slot[1] instead of > primary/secondary) looking for the correct keyid > - setkey/delkey still operate on primary/secondary, but need to figure > out whether slot[0] or slot[1] is primary/secondary based on > ->primary > > It's almost identical to the current code (and API), except you don't > need to reassign any pointers to swap keys, so it should avoid the > rekey issue. This approach sounds very close to what I was aiming for, but simpler because we have slots[2] instead of slots[8] (which means that primary_id can be just 'one-bit'). The only downside is that upon RX we have to check both keys. However I don't think this is an issue as we have just 2 keys at most. I could even optimize the lookup by starting always from the primary key, as that's the one being used most of the time by the sender. Ok, I will go with this approach you summarized here at the end. Thanks a lot! Cheers, >
2024-09-11, 14:52:10 +0200, Antonio Quartulli wrote: > On 10/09/2024 15:04, Sabrina Dubroca wrote: > > 2024-09-06, 15:19:08 +0200, Antonio Quartulli wrote: > > > Therefore, how about having an array large enough to store all key IDs (max > > > ID is 7): > > > > Right, I forgot about that. For 8 keys, sure, array sounds ok. MACsec > > has only 4 and I also implemented it as an array > > (include/net/macsec.h: macsec_{tx,rx}_sc, sa array). > > thanks for the pointer! > > > > > > * array index is the key ID (fast lookup on RX); > > > * upon SET_KEY we store the provided keys and erase the rest; > > > > I'm not sure about erasing the rest. If you're installing the > > secondary ("next primary") key, you can't just erase the current > > primary? ("erase the rest" also means you'd only ever have one key in > > the array, which contradicts your last item in this list) > Yeah, my point is wrong. > I fooled myself assuming SET_KEY would install two keys each time, but this > is not the case, so we can't "erase everything else". > > What we can do is: > * if SET_KEY wants to install a key in PRIMARY, then we erase the key marked > as "primary" and we mark the new one as such > * if SET_KEY wants to install a key in SECONDARY, then we erase the key > without any mark. Ok, then the DEL_KEY op would not really be needed. > > This way we simulate writing into slots. > > > > > > * upon SET_KEY we store in a new field the index/ID of the PRIMARY (fast > > > lookup on TX), namely primary_id; > > FTR: this is what "marking as primary" means > > > > * upon SWAP we just save in primary_id the "other" index/ID; > > > > I'm confused by these two. Both SET_KEY and SWAP modify the primary_id? > > Yes. > SET_KEY may want to install a key in the primary slot: in that case the new > key has to be marked as primary immediately. > This normally happens only upon first SET_KEY after connection, when no > other key exists. > > SWAP will switch primary_id to the other key ID. > > makes sense? Yes, thanks, that fixes up all my confusion around setting primary_id. > > > * at any given time we will have only two keys in the array. > > > > This needs to be enforced in the SET_KEY implementation, otherwise > > SWAP will have inconsistent effects. > > yap. I hope what I wrote above about SET_KEY helps clarifying this part. > > > > > > > > It's pretty much like your option 1 and 2, but using an array > > > indexed by key ID. > > > > Are you decoupling TX and RX keys (so that they can be installed > > independently) in this proposal? I can't really tell, the array could > > be key pairs or separate. > > I would not decouple TX and RX keys. > Each array item should be the same as what we are now storing into each slot > (struct ovpn_crypto_key_slot *). > I could use two arrays, instead of an array of slots, but that means > changing way more logic and I don't see the benefit. Avoid changing both keys every time when a link has very asymmetric traffic. And the concept of a primary/secondary key makes no sense on receive, all you need is keyids, and there's no need to swap slots, which avoids the "key not available during SWAP" issue entirely. Primary key only makes sense on TX. But I'm guessing the keypair concept is also baked deep into openvpn. > > > The concept of slot is a bit lost, but it is not important as long as we can > > > keep the API and its semantics the same. > > > > Would it be a problem for userspace to keep track of key IDs, and use > > that (instead of slots) to communicate with the kernel? Make > > setkey/delkey be based on keyid, and replace swap with a set_tx > > operation which updates the current keyid for TX. That would seem like > > a better API, especially for the per-ID array you're proposing > > here. The concept of slots would be almost completely lost (only > > "current tx key" is left, which is similar to the "primary slot"). > > (it becomes almost identical to the way MACsec does things, except > > possibly that in MACsec the TX and RX keys are not paired at all, so > > you can install/delete a TX key without touching any RX key) > > > > unfortunately changing userspace that way is not currently viable. > There are more components (and documentation) that attach to this slot > abstraction. I thought that might be the case. Too bad, it means we have to keep the slots API and the kernel will be stuck with it forever (even if some day userspace can manage key ids instead of slots). > Hence my proposal to keep the API the same, but rework the internal for > better implementation. Yeah, that makes sense if you have that constraint imposed by the existing userspace. > > > > > Maybe you can also rework the current code to look a bit like this > > array-based proposal, and not give up the notion of slots, if that's > > something strongly built into the core of openvpn: > > > > - primary/secondary in ovpn_crypto_state become slots[2] > > - add a one-bit "primary" reference, used to look into the slots array > > - swap just flips that "primary" bit > > - TX uses slots[primary] > > - RX keeps inspecting both slots (now slot[0] and slot[1] instead of > > primary/secondary) looking for the correct keyid > > - setkey/delkey still operate on primary/secondary, but need to figure > > out whether slot[0] or slot[1] is primary/secondary based on > > ->primary > > > > It's almost identical to the current code (and API), except you don't > > need to reassign any pointers to swap keys, so it should avoid the > > rekey issue. > > This approach sounds very close to what I was aiming for, but simpler > because we have slots[2] instead of slots[8] (which means that primary_id > can be just 'one-bit'). Yes. > The only downside is that upon RX we have to check both keys. > However I don't think this is an issue as we have just 2 keys at most. > > I could even optimize the lookup by starting always from the primary key, as > that's the one being used most of the time by the sender. Nice. > Ok, I will go with this approach you summarized here at the end. Ok. I think it should be a pretty minimal set of changes on top of the existing code, especially if you hide the primary/secondary accesses in inline helpers.
On 11/09/2024 15:30, Sabrina Dubroca wrote: > 2024-09-11, 14:52:10 +0200, Antonio Quartulli wrote: >> On 10/09/2024 15:04, Sabrina Dubroca wrote: >>> 2024-09-06, 15:19:08 +0200, Antonio Quartulli wrote: >>>> Therefore, how about having an array large enough to store all key IDs (max >>>> ID is 7): >>> >>> Right, I forgot about that. For 8 keys, sure, array sounds ok. MACsec >>> has only 4 and I also implemented it as an array >>> (include/net/macsec.h: macsec_{tx,rx}_sc, sa array). >> >> thanks for the pointer! >> >>> >>>> * array index is the key ID (fast lookup on RX); >>>> * upon SET_KEY we store the provided keys and erase the rest; >>> >>> I'm not sure about erasing the rest. If you're installing the >>> secondary ("next primary") key, you can't just erase the current >>> primary? ("erase the rest" also means you'd only ever have one key in >>> the array, which contradicts your last item in this list) >> Yeah, my point is wrong. >> I fooled myself assuming SET_KEY would install two keys each time, but this >> is not the case, so we can't "erase everything else". >> >> What we can do is: >> * if SET_KEY wants to install a key in PRIMARY, then we erase the key marked >> as "primary" and we mark the new one as such >> * if SET_KEY wants to install a key in SECONDARY, then we erase the key >> without any mark. > > Ok, then the DEL_KEY op would not really be needed. It would be needed only when we want to kick an old key without installing a new one. IMHO something that should not truly happen during the normal lifecycle of a peer, but there might be some corner cases where userspace may still want to do that (I am checking userspace to understand when this can truly happen, if at all) > >> >> This way we simulate writing into slots. >> >>> >>>> * upon SET_KEY we store in a new field the index/ID of the PRIMARY (fast >>>> lookup on TX), namely primary_id; >> >> FTR: this is what "marking as primary" means >> >>>> * upon SWAP we just save in primary_id the "other" index/ID; >>> >>> I'm confused by these two. Both SET_KEY and SWAP modify the primary_id? >> >> Yes. >> SET_KEY may want to install a key in the primary slot: in that case the new >> key has to be marked as primary immediately. >> This normally happens only upon first SET_KEY after connection, when no >> other key exists. >> >> SWAP will switch primary_id to the other key ID. >> >> makes sense? > > Yes, thanks, that fixes up all my confusion around setting primary_id. > >>>> * at any given time we will have only two keys in the array. >>> >>> This needs to be enforced in the SET_KEY implementation, otherwise >>> SWAP will have inconsistent effects. >> >> yap. I hope what I wrote above about SET_KEY helps clarifying this part. >> >>> >>> >>>> It's pretty much like your option 1 and 2, but using an array >>>> indexed by key ID. >>> >>> Are you decoupling TX and RX keys (so that they can be installed >>> independently) in this proposal? I can't really tell, the array could >>> be key pairs or separate. >> >> I would not decouple TX and RX keys. >> Each array item should be the same as what we are now storing into each slot >> (struct ovpn_crypto_key_slot *). >> I could use two arrays, instead of an array of slots, but that means >> changing way more logic and I don't see the benefit. > > Avoid changing both keys every time when a link has very asymmetric > traffic. And the concept of a primary/secondary key makes no sense on > receive, all you need is keyids, and there's no need to swap slots, > which avoids the "key not available during SWAP" issue > entirely. Primary key only makes sense on TX. > > But I'm guessing the keypair concept is also baked deep into openvpn. Correct. Both keys are recomputed upon each renegotiation, even if one key was still usable. > > >>>> The concept of slot is a bit lost, but it is not important as long as we can >>>> keep the API and its semantics the same. >>> >>> Would it be a problem for userspace to keep track of key IDs, and use >>> that (instead of slots) to communicate with the kernel? Make >>> setkey/delkey be based on keyid, and replace swap with a set_tx >>> operation which updates the current keyid for TX. That would seem like >>> a better API, especially for the per-ID array you're proposing >>> here. The concept of slots would be almost completely lost (only >>> "current tx key" is left, which is similar to the "primary slot"). >>> (it becomes almost identical to the way MACsec does things, except >>> possibly that in MACsec the TX and RX keys are not paired at all, so >>> you can install/delete a TX key without touching any RX key) >>> >> >> unfortunately changing userspace that way is not currently viable. >> There are more components (and documentation) that attach to this slot >> abstraction. > > I thought that might be the case. Too bad, it means we have to keep > the slots API and the kernel will be stuck with it forever (even if > some day userspace can manage key ids instead of slots). Yap, but I think there won't be any change like that in the near future. > >> Hence my proposal to keep the API the same, but rework the internal for >> better implementation. > > Yeah, that makes sense if you have that constraint imposed by the > existing userspace. > >> >>> >>> Maybe you can also rework the current code to look a bit like this >>> array-based proposal, and not give up the notion of slots, if that's >>> something strongly built into the core of openvpn: >>> >>> - primary/secondary in ovpn_crypto_state become slots[2] >>> - add a one-bit "primary" reference, used to look into the slots array >>> - swap just flips that "primary" bit >>> - TX uses slots[primary] >>> - RX keeps inspecting both slots (now slot[0] and slot[1] instead of >>> primary/secondary) looking for the correct keyid >>> - setkey/delkey still operate on primary/secondary, but need to figure >>> out whether slot[0] or slot[1] is primary/secondary based on >>> ->primary >>> >>> It's almost identical to the current code (and API), except you don't >>> need to reassign any pointers to swap keys, so it should avoid the >>> rekey issue. >> >> This approach sounds very close to what I was aiming for, but simpler >> because we have slots[2] instead of slots[8] (which means that primary_id >> can be just 'one-bit'). > > Yes. > >> The only downside is that upon RX we have to check both keys. >> However I don't think this is an issue as we have just 2 keys at most. >> >> I could even optimize the lookup by starting always from the primary key, as >> that's the one being used most of the time by the sender. > > Nice. > >> Ok, I will go with this approach you summarized here at the end. > > Ok. I think it should be a pretty minimal set of changes on top of the > existing code, especially if you hide the primary/secondary accesses > in inline helpers. I think so to - working on it. Thanks a lot! Cheers, >
Hello Antonio, Sabrina, let me join the key management design discussion and put my 2 cents in. Please find my comment below. On 12.09.2024 11:33, Antonio Quartulli wrote: > On 11/09/2024 15:30, Sabrina Dubroca wrote: >> 2024-09-11, 14:52:10 +0200, Antonio Quartulli wrote: >>> On 10/09/2024 15:04, Sabrina Dubroca wrote: >>>> 2024-09-06, 15:19:08 +0200, Antonio Quartulli wrote: >>>>> Therefore, how about having an array large enough to store all key >>>>> IDs (max >>>>> ID is 7): >>>> >>>> Right, I forgot about that. For 8 keys, sure, array sounds ok. MACsec >>>> has only 4 and I also implemented it as an array >>>> (include/net/macsec.h: macsec_{tx,rx}_sc, sa array). >>> >>> thanks for the pointer! >>> >>>> >>>>> * array index is the key ID (fast lookup on RX); >>>>> * upon SET_KEY we store the provided keys and erase the rest; >>>> >>>> I'm not sure about erasing the rest. If you're installing the >>>> secondary ("next primary") key, you can't just erase the current >>>> primary? ("erase the rest" also means you'd only ever have one key in >>>> the array, which contradicts your last item in this list) >>> Yeah, my point is wrong. >>> I fooled myself assuming SET_KEY would install two keys each time, >>> but this >>> is not the case, so we can't "erase everything else". >>> >>> What we can do is: >>> * if SET_KEY wants to install a key in PRIMARY, then we erase the key >>> marked >>> as "primary" and we mark the new one as such >>> * if SET_KEY wants to install a key in SECONDARY, then we erase the key >>> without any mark. >> >> Ok, then the DEL_KEY op would not really be needed. > > It would be needed only when we want to kick an old key without > installing a new one. > IMHO something that should not truly happen during the normal lifecycle > of a peer, but there might be some corner cases where userspace may > still want to do that (I am checking userspace to understand when this > can truly happen, if at all) > >> >>> >>> This way we simulate writing into slots. >>> >>>> >>>>> * upon SET_KEY we store in a new field the index/ID of the PRIMARY >>>>> (fast >>>>> lookup on TX), namely primary_id; >>> >>> FTR: this is what "marking as primary" means >>> >>>>> * upon SWAP we just save in primary_id the "other" index/ID; >>>> >>>> I'm confused by these two. Both SET_KEY and SWAP modify the primary_id? >>> >>> Yes. >>> SET_KEY may want to install a key in the primary slot: in that case >>> the new >>> key has to be marked as primary immediately. >>> This normally happens only upon first SET_KEY after connection, when no >>> other key exists. >>> >>> SWAP will switch primary_id to the other key ID. >>> >>> makes sense? >> >> Yes, thanks, that fixes up all my confusion around setting primary_id. >> >>>>> * at any given time we will have only two keys in the array. >>>> >>>> This needs to be enforced in the SET_KEY implementation, otherwise >>>> SWAP will have inconsistent effects. >>> >>> yap. I hope what I wrote above about SET_KEY helps clarifying this part. >>> >>>> >>>> >>>>> It's pretty much like your option 1 and 2, but using an array >>>>> indexed by key ID. >>>> >>>> Are you decoupling TX and RX keys (so that they can be installed >>>> independently) in this proposal? I can't really tell, the array could >>>> be key pairs or separate. >>> >>> I would not decouple TX and RX keys. >>> Each array item should be the same as what we are now storing into >>> each slot >>> (struct ovpn_crypto_key_slot *). >>> I could use two arrays, instead of an array of slots, but that means >>> changing way more logic and I don't see the benefit. >> >> Avoid changing both keys every time when a link has very asymmetric >> traffic. And the concept of a primary/secondary key makes no sense on >> receive, all you need is keyids, and there's no need to swap slots, >> which avoids the "key not available during SWAP" issue >> entirely. Primary key only makes sense on TX. >> >> But I'm guessing the keypair concept is also baked deep into openvpn. > > Correct. Both keys are recomputed upon each renegotiation, even if one > key was still usable. > >> >> >>>>> The concept of slot is a bit lost, but it is not important as long >>>>> as we can >>>>> keep the API and its semantics the same. >>>> >>>> Would it be a problem for userspace to keep track of key IDs, and use >>>> that (instead of slots) to communicate with the kernel? Make >>>> setkey/delkey be based on keyid, and replace swap with a set_tx >>>> operation which updates the current keyid for TX. That would seem like >>>> a better API, especially for the per-ID array you're proposing >>>> here. The concept of slots would be almost completely lost (only >>>> "current tx key" is left, which is similar to the "primary slot"). >>>> (it becomes almost identical to the way MACsec does things, except >>>> possibly that in MACsec the TX and RX keys are not paired at all, so >>>> you can install/delete a TX key without touching any RX key) >>>> >>> >>> unfortunately changing userspace that way is not currently viable. >>> There are more components (and documentation) that attach to this slot >>> abstraction. >> >> I thought that might be the case. Too bad, it means we have to keep >> the slots API and the kernel will be stuck with it forever (even if >> some day userspace can manage key ids instead of slots). > > Yap, but I think there won't be any change like that in the near future. As far as I understand the conclusion of the discussion, the best practice for the keys management is to keep keys discrete including the Tx/Rx keys separation. On the other hand, from the OpenVPN protocol perspective, keys are grouped into slots and peers perform so called key slots swapping. As I can see, we have two options to adjust userspace application and kernel to each other: 1) implement "key slots" abstraction in Netlink API to keep kernel interface similar to the current OpenVPN protocol; 2) implement "discrete" keys management via Netlink API and move "slots" abstraction to the userspace application. Sabrina already mentioned that kernel will stuck with the slots abstraction forever. This is due to the rule that user's API should not be changed. Antonio, can we consider implementing the slot abstraction in the userspace application? It will move complexity from the kernel to the userspace, where it can be easy debugged and maintained. And since the keys management is part of the control plane. Keeping abstraction closer to the control plane implementation will allow any protocol modification, while keeping the kernel forward compatible. >>> Hence my proposal to keep the API the same, but rework the internal for >>> better implementation. >> >> Yeah, that makes sense if you have that constraint imposed by the >> existing userspace. >> >>> >>>> >>>> Maybe you can also rework the current code to look a bit like this >>>> array-based proposal, and not give up the notion of slots, if that's >>>> something strongly built into the core of openvpn: >>>> >>>> - primary/secondary in ovpn_crypto_state become slots[2] >>>> - add a one-bit "primary" reference, used to look into the slots array >>>> - swap just flips that "primary" bit >>>> - TX uses slots[primary] >>>> - RX keeps inspecting both slots (now slot[0] and slot[1] instead of >>>> primary/secondary) looking for the correct keyid >>>> - setkey/delkey still operate on primary/secondary, but need to figure >>>> out whether slot[0] or slot[1] is primary/secondary based on >>>> ->primary >>>> >>>> It's almost identical to the current code (and API), except you don't >>>> need to reassign any pointers to swap keys, so it should avoid the >>>> rekey issue. >>> >>> This approach sounds very close to what I was aiming for, but simpler >>> because we have slots[2] instead of slots[8] (which means that >>> primary_id >>> can be just 'one-bit'). >> >> Yes. >> >>> The only downside is that upon RX we have to check both keys. >>> However I don't think this is an issue as we have just 2 keys at most. >>> >>> I could even optimize the lookup by starting always from the primary >>> key, as >>> that's the one being used most of the time by the sender. >> >> Nice. >> >>> Ok, I will go with this approach you summarized here at the end. >> >> Ok. I think it should be a pretty minimal set of changes on top of the >> existing code, especially if you hide the primary/secondary accesses >> in inline helpers. > > I think so to - working on it.
Hi, On 22/09/2024 21:51, Sergey Ryazanov wrote: > Hello Antonio, Sabrina, > > let me join the key management design discussion and put my 2 cents in. > Please find my comment below. > > On 12.09.2024 11:33, Antonio Quartulli wrote: >> On 11/09/2024 15:30, Sabrina Dubroca wrote: >>> 2024-09-11, 14:52:10 +0200, Antonio Quartulli wrote: >>>> On 10/09/2024 15:04, Sabrina Dubroca wrote: >>>>> 2024-09-06, 15:19:08 +0200, Antonio Quartulli wrote: >>>>>> Therefore, how about having an array large enough to store all key >>>>>> IDs (max >>>>>> ID is 7): >>>>> >>>>> Right, I forgot about that. For 8 keys, sure, array sounds ok. MACsec >>>>> has only 4 and I also implemented it as an array >>>>> (include/net/macsec.h: macsec_{tx,rx}_sc, sa array). >>>> >>>> thanks for the pointer! >>>> >>>>> >>>>>> * array index is the key ID (fast lookup on RX); >>>>>> * upon SET_KEY we store the provided keys and erase the rest; >>>>> >>>>> I'm not sure about erasing the rest. If you're installing the >>>>> secondary ("next primary") key, you can't just erase the current >>>>> primary? ("erase the rest" also means you'd only ever have one key in >>>>> the array, which contradicts your last item in this list) >>>> Yeah, my point is wrong. >>>> I fooled myself assuming SET_KEY would install two keys each time, >>>> but this >>>> is not the case, so we can't "erase everything else". >>>> >>>> What we can do is: >>>> * if SET_KEY wants to install a key in PRIMARY, then we erase the >>>> key marked >>>> as "primary" and we mark the new one as such >>>> * if SET_KEY wants to install a key in SECONDARY, then we erase the key >>>> without any mark. >>> >>> Ok, then the DEL_KEY op would not really be needed. >> >> It would be needed only when we want to kick an old key without >> installing a new one. >> IMHO something that should not truly happen during the normal >> lifecycle of a peer, but there might be some corner cases where >> userspace may still want to do that (I am checking userspace to >> understand when this can truly happen, if at all) >> >>> >>>> >>>> This way we simulate writing into slots. >>>> >>>>> >>>>>> * upon SET_KEY we store in a new field the index/ID of the PRIMARY >>>>>> (fast >>>>>> lookup on TX), namely primary_id; >>>> >>>> FTR: this is what "marking as primary" means >>>> >>>>>> * upon SWAP we just save in primary_id the "other" index/ID; >>>>> >>>>> I'm confused by these two. Both SET_KEY and SWAP modify the >>>>> primary_id? >>>> >>>> Yes. >>>> SET_KEY may want to install a key in the primary slot: in that case >>>> the new >>>> key has to be marked as primary immediately. >>>> This normally happens only upon first SET_KEY after connection, when no >>>> other key exists. >>>> >>>> SWAP will switch primary_id to the other key ID. >>>> >>>> makes sense? >>> >>> Yes, thanks, that fixes up all my confusion around setting primary_id. >>> >>>>>> * at any given time we will have only two keys in the array. >>>>> >>>>> This needs to be enforced in the SET_KEY implementation, otherwise >>>>> SWAP will have inconsistent effects. >>>> >>>> yap. I hope what I wrote above about SET_KEY helps clarifying this >>>> part. >>>> >>>>> >>>>> >>>>>> It's pretty much like your option 1 and 2, but using an array >>>>>> indexed by key ID. >>>>> >>>>> Are you decoupling TX and RX keys (so that they can be installed >>>>> independently) in this proposal? I can't really tell, the array could >>>>> be key pairs or separate. >>>> >>>> I would not decouple TX and RX keys. >>>> Each array item should be the same as what we are now storing into >>>> each slot >>>> (struct ovpn_crypto_key_slot *). >>>> I could use two arrays, instead of an array of slots, but that means >>>> changing way more logic and I don't see the benefit. >>> >>> Avoid changing both keys every time when a link has very asymmetric >>> traffic. And the concept of a primary/secondary key makes no sense on >>> receive, all you need is keyids, and there's no need to swap slots, >>> which avoids the "key not available during SWAP" issue >>> entirely. Primary key only makes sense on TX. >>> >>> But I'm guessing the keypair concept is also baked deep into openvpn. >> >> Correct. Both keys are recomputed upon each renegotiation, even if one >> key was still usable. >> >>> >>> >>>>>> The concept of slot is a bit lost, but it is not important as long >>>>>> as we can >>>>>> keep the API and its semantics the same. >>>>> >>>>> Would it be a problem for userspace to keep track of key IDs, and use >>>>> that (instead of slots) to communicate with the kernel? Make >>>>> setkey/delkey be based on keyid, and replace swap with a set_tx >>>>> operation which updates the current keyid for TX. That would seem like >>>>> a better API, especially for the per-ID array you're proposing >>>>> here. The concept of slots would be almost completely lost (only >>>>> "current tx key" is left, which is similar to the "primary slot"). >>>>> (it becomes almost identical to the way MACsec does things, except >>>>> possibly that in MACsec the TX and RX keys are not paired at all, so >>>>> you can install/delete a TX key without touching any RX key) >>>>> >>>> >>>> unfortunately changing userspace that way is not currently viable. >>>> There are more components (and documentation) that attach to this slot >>>> abstraction. >>> >>> I thought that might be the case. Too bad, it means we have to keep >>> the slots API and the kernel will be stuck with it forever (even if >>> some day userspace can manage key ids instead of slots). >> >> Yap, but I think there won't be any change like that in the near future. > > As far as I understand the conclusion of the discussion, the best > practice for the keys management is to keep keys discrete including the > Tx/Rx keys separation. On the other hand, from the OpenVPN protocol > perspective, keys are grouped into slots and peers perform so called key > slots swapping. > > As I can see, we have two options to adjust userspace application and > kernel to each other: > 1) implement "key slots" abstraction in Netlink API to keep kernel > interface similar to the current OpenVPN protocol; > 2) implement "discrete" keys management via Netlink API and move "slots" > abstraction to the userspace application. > > Sabrina already mentioned that kernel will stuck with the slots > abstraction forever. This is due to the rule that user's API should not > be changed. > > Antonio, can we consider implementing the slot abstraction in the > userspace application? It will move complexity from the kernel to the > userspace, where it can be easy debugged and maintained. > > And since the keys management is part of the control plane. Keeping > abstraction closer to the control plane implementation will allow any > protocol modification, while keeping the kernel forward compatible. Thanks for chiming in! Right now the implementation is already pretty simple IMHO, there is not much complexity to move to userspace. And that's because ovpn in kernelspace "thinks" in terms of slots like userspace. If we want to go with a different approach, we have to add the "complexity" of converting abstraction from slots to anything being proposed here. I presume you're advocating about moving from slots to key IDs (and separate keys for TX and RX)? How do you see it working in detail? I.e. how many keys should we store and when should we purge them? (right now we just keep 2 pair of keys - aka slots - and the lifecycle is simply done) And why do you think it is simpler? One more advantage I see about using slots is that it's easy to match the user and kernel space states: at any point in time userspace can say "these are the pairs I have in my two slots" and kernelspace will just mirror that, without having to care about old IDs. Don't you think this approach is already pretty simple and less error prone with respect to going through an abstraction conversion? Thanks! Cheers, > >>>> Hence my proposal to keep the API the same, but rework the internal for >>>> better implementation. >>> >>> Yeah, that makes sense if you have that constraint imposed by the >>> existing userspace. >>> >>>> >>>>> >>>>> Maybe you can also rework the current code to look a bit like this >>>>> array-based proposal, and not give up the notion of slots, if that's >>>>> something strongly built into the core of openvpn: >>>>> >>>>> - primary/secondary in ovpn_crypto_state become slots[2] >>>>> - add a one-bit "primary" reference, used to look into the slots array >>>>> - swap just flips that "primary" bit >>>>> - TX uses slots[primary] >>>>> - RX keeps inspecting both slots (now slot[0] and slot[1] instead of >>>>> primary/secondary) looking for the correct keyid >>>>> - setkey/delkey still operate on primary/secondary, but need to figure >>>>> out whether slot[0] or slot[1] is primary/secondary based on >>>>> ->primary >>>>> >>>>> It's almost identical to the current code (and API), except you don't >>>>> need to reassign any pointers to swap keys, so it should avoid the >>>>> rekey issue. >>>> >>>> This approach sounds very close to what I was aiming for, but simpler >>>> because we have slots[2] instead of slots[8] (which means that >>>> primary_id >>>> can be just 'one-bit'). >>> >>> Yes. >>> >>>> The only downside is that upon RX we have to check both keys. >>>> However I don't think this is an issue as we have just 2 keys at most. >>>> >>>> I could even optimize the lookup by starting always from the primary >>>> key, as >>>> that's the one being used most of the time by the sender. >>> >>> Nice. >>> >>>> Ok, I will go with this approach you summarized here at the end. >>> >>> Ok. I think it should be a pretty minimal set of changes on top of the >>> existing code, especially if you hide the primary/secondary accesses >>> in inline helpers. >> >> I think so to - working on it. >
diff --git a/drivers/net/ovpn/Makefile b/drivers/net/ovpn/Makefile index 56bddc9bef83..ccdaeced1982 100644 --- a/drivers/net/ovpn/Makefile +++ b/drivers/net/ovpn/Makefile @@ -8,10 +8,13 @@ obj-$(CONFIG_OVPN) := ovpn.o ovpn-y += bind.o +ovpn-y += crypto.o +ovpn-y += crypto_aead.o ovpn-y += main.o ovpn-y += io.o ovpn-y += netlink.o ovpn-y += netlink-gen.o ovpn-y += peer.o +ovpn-y += pktid.o ovpn-y += socket.o ovpn-y += udp.o diff --git a/drivers/net/ovpn/crypto.c b/drivers/net/ovpn/crypto.c new file mode 100644 index 000000000000..0bceaef32f5b --- /dev/null +++ b/drivers/net/ovpn/crypto.c @@ -0,0 +1,149 @@ +// SPDX-License-Identifier: GPL-2.0 +/* OpenVPN data channel offload + * + * Copyright (C) 2020-2024 OpenVPN, Inc. + * + * Author: James Yonan <james@openvpn.net> + * Antonio Quartulli <antonio@openvpn.net> + */ + +#include <linux/types.h> +#include <linux/net.h> +#include <linux/netdevice.h> +#include <uapi/linux/ovpn.h> + +#include "ovpnstruct.h" +#include "main.h" +#include "packet.h" +#include "pktid.h" +#include "crypto_aead.h" +#include "crypto.h" + +static void ovpn_ks_destroy_rcu(struct rcu_head *head) +{ + struct ovpn_crypto_key_slot *ks; + + ks = container_of(head, struct ovpn_crypto_key_slot, rcu); + ovpn_aead_crypto_key_slot_destroy(ks); +} + +void ovpn_crypto_key_slot_release(struct kref *kref) +{ + struct ovpn_crypto_key_slot *ks; + + ks = container_of(kref, struct ovpn_crypto_key_slot, refcount); + call_rcu(&ks->rcu, ovpn_ks_destroy_rcu); +} + +/* can only be invoked when all peer references have been dropped (i.e. RCU + * release routine) + */ +void ovpn_crypto_state_release(struct ovpn_crypto_state *cs) +{ + struct ovpn_crypto_key_slot *ks; + + ks = rcu_access_pointer(cs->primary); + if (ks) { + RCU_INIT_POINTER(cs->primary, NULL); + ovpn_crypto_key_slot_put(ks); + } + + ks = rcu_access_pointer(cs->secondary); + if (ks) { + RCU_INIT_POINTER(cs->secondary, NULL); + ovpn_crypto_key_slot_put(ks); + } + + mutex_destroy(&cs->mutex); +} + +/* Reset the ovpn_crypto_state object in a way that is atomic + * to RCU readers. + */ +int ovpn_crypto_state_reset(struct ovpn_crypto_state *cs, + const struct ovpn_peer_key_reset *pkr) +{ + struct ovpn_crypto_key_slot *old = NULL, *new; + + if (pkr->slot != OVPN_KEY_SLOT_PRIMARY && + pkr->slot != OVPN_KEY_SLOT_SECONDARY) + return -EINVAL; + + new = ovpn_aead_crypto_key_slot_new(&pkr->key); + if (IS_ERR(new)) + return PTR_ERR(new); + + mutex_lock(&cs->mutex); + switch (pkr->slot) { + case OVPN_KEY_SLOT_PRIMARY: + old = rcu_replace_pointer(cs->primary, new, + lockdep_is_held(&cs->mutex)); + break; + case OVPN_KEY_SLOT_SECONDARY: + old = rcu_replace_pointer(cs->secondary, new, + lockdep_is_held(&cs->mutex)); + break; + } + mutex_unlock(&cs->mutex); + + if (old) + ovpn_crypto_key_slot_put(old); + + return 0; +} + +void ovpn_crypto_key_slot_delete(struct ovpn_crypto_state *cs, + enum ovpn_key_slot slot) +{ + struct ovpn_crypto_key_slot *ks = NULL; + + if (slot != OVPN_KEY_SLOT_PRIMARY && + slot != OVPN_KEY_SLOT_SECONDARY) { + pr_warn("Invalid slot to release: %u\n", slot); + return; + } + + mutex_lock(&cs->mutex); + switch (slot) { + case OVPN_KEY_SLOT_PRIMARY: + ks = rcu_replace_pointer(cs->primary, NULL, + lockdep_is_held(&cs->mutex)); + break; + case OVPN_KEY_SLOT_SECONDARY: + ks = rcu_replace_pointer(cs->secondary, NULL, + lockdep_is_held(&cs->mutex)); + break; + } + mutex_unlock(&cs->mutex); + + if (!ks) { + pr_debug("Key slot already released: %u\n", slot); + return; + } + + pr_debug("deleting key slot %u, key_id=%u\n", slot, ks->key_id); + ovpn_crypto_key_slot_put(ks); +} + +/* this swap is not atomic, but there will be a very short time frame where the + * old_secondary key won't be available. This should not be a big deal as most + * likely both peers are already using the new primary at this point. + */ +void ovpn_crypto_key_slots_swap(struct ovpn_crypto_state *cs) +{ + const struct ovpn_crypto_key_slot *old_primary, *old_secondary; + + mutex_lock(&cs->mutex); + + old_secondary = rcu_dereference_protected(cs->secondary, + lockdep_is_held(&cs->mutex)); + old_primary = rcu_replace_pointer(cs->primary, old_secondary, + lockdep_is_held(&cs->mutex)); + rcu_assign_pointer(cs->secondary, old_primary); + + pr_debug("key swapped: %u <-> %u\n", + old_primary ? old_primary->key_id : 0, + old_secondary ? old_secondary->key_id : 0); + + mutex_unlock(&cs->mutex); +} diff --git a/drivers/net/ovpn/crypto.h b/drivers/net/ovpn/crypto.h new file mode 100644 index 000000000000..228833db51a1 --- /dev/null +++ b/drivers/net/ovpn/crypto.h @@ -0,0 +1,136 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* OpenVPN data channel offload + * + * Copyright (C) 2020-2024 OpenVPN, Inc. + * + * Author: James Yonan <james@openvpn.net> + * Antonio Quartulli <antonio@openvpn.net> + */ + +#ifndef _NET_OVPN_OVPNCRYPTO_H_ +#define _NET_OVPN_OVPNCRYPTO_H_ + +struct ovpn_peer; +struct ovpn_crypto_key_slot; + +/* info needed for both encrypt and decrypt directions */ +struct ovpn_key_direction { + const u8 *cipher_key; + size_t cipher_key_size; + const u8 *nonce_tail; /* only needed for GCM modes */ + size_t nonce_tail_size; /* only needed for GCM modes */ +}; + +/* all info for a particular symmetric key (primary or secondary) */ +struct ovpn_key_config { + enum ovpn_cipher_alg cipher_alg; + u8 key_id; + struct ovpn_key_direction encrypt; + struct ovpn_key_direction decrypt; +}; + +/* used to pass settings from netlink to the crypto engine */ +struct ovpn_peer_key_reset { + enum ovpn_key_slot slot; + struct ovpn_key_config key; +}; + +struct ovpn_crypto_key_slot { + u8 key_id; + + struct crypto_aead *encrypt; + struct crypto_aead *decrypt; + struct ovpn_nonce_tail nonce_tail_xmit; + struct ovpn_nonce_tail nonce_tail_recv; + + struct ovpn_pktid_recv pid_recv ____cacheline_aligned_in_smp; + struct ovpn_pktid_xmit pid_xmit ____cacheline_aligned_in_smp; + struct kref refcount; + struct rcu_head rcu; +}; + +struct ovpn_crypto_state { + struct ovpn_crypto_key_slot __rcu *primary; + struct ovpn_crypto_key_slot __rcu *secondary; + + /* protects primary and secondary slots */ + struct mutex mutex; +}; + +static inline bool ovpn_crypto_key_slot_hold(struct ovpn_crypto_key_slot *ks) +{ + return kref_get_unless_zero(&ks->refcount); +} + +static inline void ovpn_crypto_state_init(struct ovpn_crypto_state *cs) +{ + RCU_INIT_POINTER(cs->primary, NULL); + RCU_INIT_POINTER(cs->secondary, NULL); + mutex_init(&cs->mutex); +} + +static inline struct ovpn_crypto_key_slot * +ovpn_crypto_key_id_to_slot(const struct ovpn_crypto_state *cs, u8 key_id) +{ + struct ovpn_crypto_key_slot *ks; + + if (unlikely(!cs)) + return NULL; + + rcu_read_lock(); + ks = rcu_dereference(cs->primary); + if (ks && ks->key_id == key_id) { + if (unlikely(!ovpn_crypto_key_slot_hold(ks))) + ks = NULL; + goto out; + } + + ks = rcu_dereference(cs->secondary); + if (ks && ks->key_id == key_id) { + if (unlikely(!ovpn_crypto_key_slot_hold(ks))) + ks = NULL; + goto out; + } + + /* when both key slots are occupied but no matching key ID is found, ks + * has to be reset to NULL to avoid carrying a stale pointer + */ + ks = NULL; +out: + rcu_read_unlock(); + + return ks; +} + +static inline struct ovpn_crypto_key_slot * +ovpn_crypto_key_slot_primary(const struct ovpn_crypto_state *cs) +{ + struct ovpn_crypto_key_slot *ks; + + rcu_read_lock(); + ks = rcu_dereference(cs->primary); + if (unlikely(ks && !ovpn_crypto_key_slot_hold(ks))) + ks = NULL; + rcu_read_unlock(); + + return ks; +} + +void ovpn_crypto_key_slot_release(struct kref *kref); + +static inline void ovpn_crypto_key_slot_put(struct ovpn_crypto_key_slot *ks) +{ + kref_put(&ks->refcount, ovpn_crypto_key_slot_release); +} + +int ovpn_crypto_state_reset(struct ovpn_crypto_state *cs, + const struct ovpn_peer_key_reset *pkr); + +void ovpn_crypto_key_slot_delete(struct ovpn_crypto_state *cs, + enum ovpn_key_slot slot); + +void ovpn_crypto_state_release(struct ovpn_crypto_state *cs); + +void ovpn_crypto_key_slots_swap(struct ovpn_crypto_state *cs); + +#endif /* _NET_OVPN_OVPNCRYPTO_H_ */ diff --git a/drivers/net/ovpn/crypto_aead.c b/drivers/net/ovpn/crypto_aead.c new file mode 100644 index 000000000000..2e36ba8c4bfd --- /dev/null +++ b/drivers/net/ovpn/crypto_aead.c @@ -0,0 +1,374 @@ +// SPDX-License-Identifier: GPL-2.0 +/* OpenVPN data channel offload + * + * Copyright (C) 2020-2024 OpenVPN, Inc. + * + * Author: James Yonan <james@openvpn.net> + * Antonio Quartulli <antonio@openvpn.net> + */ + +#include <crypto/aead.h> +#include <linux/skbuff.h> +#include <net/ip.h> +#include <net/ipv6.h> +#include <net/udp.h> + +#include "ovpnstruct.h" +#include "main.h" +#include "io.h" +#include "packet.h" +#include "pktid.h" +#include "crypto_aead.h" +#include "crypto.h" +#include "peer.h" +#include "proto.h" +#include "skb.h" + +#define AUTH_TAG_SIZE 16 + +static int ovpn_aead_encap_overhead(const struct ovpn_crypto_key_slot *ks) +{ + return OVPN_OP_SIZE_V2 + /* OP header size */ + 4 + /* Packet ID */ + crypto_aead_authsize(ks->encrypt); /* Auth Tag */ +} + +static void ovpn_aead_encrypt_done(void *data, int ret) +{ + struct sk_buff *skb = data; + + aead_request_free(ovpn_skb_cb(skb)->ctx->req); + ovpn_encrypt_post(skb, ret); +} + +int ovpn_aead_encrypt(struct ovpn_peer *peer, struct ovpn_crypto_key_slot *ks, + struct sk_buff *skb) +{ + const unsigned int tag_size = crypto_aead_authsize(ks->encrypt); + const unsigned int head_size = ovpn_aead_encap_overhead(ks); + DECLARE_CRYPTO_WAIT(wait); + struct aead_request *req; + struct sk_buff *trailer; + struct scatterlist *sg; + u8 iv[NONCE_SIZE]; + int nfrags, ret; + u32 pktid, op; + + /* Sample AEAD header format: + * 48000001 00000005 7e7046bd 444a7e28 cc6387b1 64a4d6c1 380275a... + * [ OP32 ] [seq # ] [ auth tag ] [ payload ... ] + * [4-byte + * IV head] + */ + + /* check that there's enough headroom in the skb for packet + * encapsulation, after adding network header and encryption overhead + */ + if (unlikely(skb_cow_head(skb, OVPN_HEAD_ROOM + head_size))) + return -ENOBUFS; + + /* get number of skb frags and ensure that packet data is writable */ + nfrags = skb_cow_data(skb, 0, &trailer); + if (unlikely(nfrags < 0)) + return nfrags; + + if (unlikely(nfrags + 2 > (MAX_SKB_FRAGS + 2))) + return -ENOSPC; + + ovpn_skb_cb(skb)->ctx = kmalloc(sizeof(*ovpn_skb_cb(skb)->ctx), + GFP_ATOMIC); + if (unlikely(!ovpn_skb_cb(skb)->ctx)) + return -ENOMEM; + + sg = ovpn_skb_cb(skb)->ctx->sg; + + /* sg table: + * 0: op, wire nonce (AD, len=OVPN_OP_SIZE_V2+NONCE_WIRE_SIZE), + * 1, 2, 3, ..., n: payload, + * n+1: auth_tag (len=tag_size) + */ + sg_init_table(sg, nfrags + 2); + + /* build scatterlist to encrypt packet payload */ + ret = skb_to_sgvec_nomark(skb, sg + 1, 0, skb->len); + if (unlikely(nfrags != ret)) { + kfree(sg); + return -EINVAL; + } + + /* append auth_tag onto scatterlist */ + __skb_push(skb, tag_size); + sg_set_buf(sg + nfrags + 1, skb->data, tag_size); + + /* obtain packet ID, which is used both as a first + * 4 bytes of nonce and last 4 bytes of associated data. + */ + ret = ovpn_pktid_xmit_next(&ks->pid_xmit, &pktid); + if (unlikely(ret < 0)) { + kfree(ovpn_skb_cb(skb)->ctx); + return ret; + } + + /* concat 4 bytes packet id and 8 bytes nonce tail into 12 bytes + * nonce + */ + ovpn_pktid_aead_write(pktid, &ks->nonce_tail_xmit, iv); + + /* make space for packet id and push it to the front */ + __skb_push(skb, NONCE_WIRE_SIZE); + memcpy(skb->data, iv, NONCE_WIRE_SIZE); + + /* add packet op as head of additional data */ + op = ovpn_opcode_compose(OVPN_DATA_V2, ks->key_id, peer->id); + __skb_push(skb, OVPN_OP_SIZE_V2); + BUILD_BUG_ON(sizeof(op) != OVPN_OP_SIZE_V2); + *((__force __be32 *)skb->data) = htonl(op); + + /* AEAD Additional data */ + sg_set_buf(sg, skb->data, OVPN_OP_SIZE_V2 + NONCE_WIRE_SIZE); + + req = aead_request_alloc(ks->encrypt, GFP_ATOMIC); + if (unlikely(!req)) { + kfree(ovpn_skb_cb(skb)->ctx); + return -ENOMEM; + } + + /* setup async crypto operation */ + aead_request_set_tfm(req, ks->encrypt); + aead_request_set_callback(req, 0, ovpn_aead_encrypt_done, skb); + aead_request_set_crypt(req, sg, sg, skb->len - head_size, iv); + aead_request_set_ad(req, OVPN_OP_SIZE_V2 + NONCE_WIRE_SIZE); + + ovpn_skb_cb(skb)->ctx->peer = peer; + ovpn_skb_cb(skb)->ctx->req = req; + ovpn_skb_cb(skb)->ctx->ks = ks; + + /* encrypt it */ + return crypto_aead_encrypt(req); +} + +static void ovpn_aead_decrypt_done(void *data, int ret) +{ + struct sk_buff *skb = data; + + aead_request_free(ovpn_skb_cb(skb)->ctx->req); + ovpn_decrypt_post(skb, ret); +} + +int ovpn_aead_decrypt(struct ovpn_peer *peer, struct ovpn_crypto_key_slot *ks, + struct sk_buff *skb) +{ + const unsigned int tag_size = crypto_aead_authsize(ks->decrypt); + int ret, payload_len, nfrags; + unsigned int payload_offset; + DECLARE_CRYPTO_WAIT(wait); + struct aead_request *req; + struct sk_buff *trailer; + struct scatterlist *sg; + unsigned int sg_len; + u8 iv[NONCE_SIZE]; + + payload_offset = OVPN_OP_SIZE_V2 + NONCE_WIRE_SIZE + tag_size; + payload_len = skb->len - payload_offset; + + /* sanity check on packet size, payload size must be >= 0 */ + if (unlikely(payload_len < 0)) + return -EINVAL; + + /* Prepare the skb data buffer to be accessed up until the auth tag. + * This is required because this area is directly mapped into the sg + * list. + */ + if (unlikely(!pskb_may_pull(skb, payload_offset))) + return -ENODATA; + + /* get number of skb frags and ensure that packet data is writable */ + nfrags = skb_cow_data(skb, 0, &trailer); + if (unlikely(nfrags < 0)) + return nfrags; + + if (unlikely(nfrags + 2 > (MAX_SKB_FRAGS + 2))) + return -ENOSPC; + + ovpn_skb_cb(skb)->ctx = kmalloc(sizeof(*ovpn_skb_cb(skb)->ctx), + GFP_ATOMIC); + if (unlikely(!ovpn_skb_cb(skb)->ctx)) + return -ENOMEM; + + sg = ovpn_skb_cb(skb)->ctx->sg; + + /* sg table: + * 0: op, wire nonce (AD, len=OVPN_OP_SIZE_V2+NONCE_WIRE_SIZE), + * 1, 2, 3, ..., n: payload, + * n+1: auth_tag (len=tag_size) + */ + sg_init_table(sg, nfrags + 2); + + /* packet op is head of additional data */ + sg_len = OVPN_OP_SIZE_V2 + NONCE_WIRE_SIZE; + sg_set_buf(sg, skb->data, sg_len); + + /* build scatterlist to decrypt packet payload */ + ret = skb_to_sgvec_nomark(skb, sg + 1, payload_offset, payload_len); + if (unlikely(nfrags != ret)) { + kfree(ovpn_skb_cb(skb)->ctx); + return -EINVAL; + } + + /* append auth_tag onto scatterlist */ + sg_set_buf(sg + nfrags + 1, skb->data + sg_len, tag_size); + + /* copy nonce into IV buffer */ + memcpy(iv, skb->data + OVPN_OP_SIZE_V2, NONCE_WIRE_SIZE); + memcpy(iv + NONCE_WIRE_SIZE, ks->nonce_tail_recv.u8, + sizeof(struct ovpn_nonce_tail)); + + req = aead_request_alloc(ks->decrypt, GFP_ATOMIC); + if (unlikely(!req)) { + kfree(ovpn_skb_cb(skb)->ctx); + return -ENOMEM; + } + + /* setup async crypto operation */ + aead_request_set_tfm(req, ks->decrypt); + aead_request_set_callback(req, 0, ovpn_aead_decrypt_done, skb); + aead_request_set_crypt(req, sg, sg, payload_len + tag_size, iv); + + aead_request_set_ad(req, NONCE_WIRE_SIZE + OVPN_OP_SIZE_V2); + + ovpn_skb_cb(skb)->ctx->payload_offset = payload_offset; + ovpn_skb_cb(skb)->ctx->peer = peer; + ovpn_skb_cb(skb)->ctx->req = req; + ovpn_skb_cb(skb)->ctx->ks = ks; + + /* decrypt it */ + return crypto_aead_decrypt(req); +} + +/* Initialize a struct crypto_aead object */ +struct crypto_aead *ovpn_aead_init(const char *title, const char *alg_name, + const unsigned char *key, + unsigned int keylen) +{ + struct crypto_aead *aead; + int ret; + + aead = crypto_alloc_aead(alg_name, 0, 0); + if (IS_ERR(aead)) { + ret = PTR_ERR(aead); + pr_err("%s crypto_alloc_aead failed, err=%d\n", title, ret); + aead = NULL; + goto error; + } + + ret = crypto_aead_setkey(aead, key, keylen); + if (ret) { + pr_err("%s crypto_aead_setkey size=%u failed, err=%d\n", title, + keylen, ret); + goto error; + } + + ret = crypto_aead_setauthsize(aead, AUTH_TAG_SIZE); + if (ret) { + pr_err("%s crypto_aead_setauthsize failed, err=%d\n", title, + ret); + goto error; + } + + /* basic AEAD assumption */ + if (crypto_aead_ivsize(aead) != NONCE_SIZE) { + pr_err("%s IV size must be %d\n", title, NONCE_SIZE); + ret = -EINVAL; + goto error; + } + + pr_debug("********* Cipher %s (%s)\n", alg_name, title); + pr_debug("*** IV size=%u\n", crypto_aead_ivsize(aead)); + pr_debug("*** req size=%u\n", crypto_aead_reqsize(aead)); + pr_debug("*** block size=%u\n", crypto_aead_blocksize(aead)); + pr_debug("*** auth size=%u\n", crypto_aead_authsize(aead)); + pr_debug("*** alignmask=0x%x\n", crypto_aead_alignmask(aead)); + + return aead; + +error: + crypto_free_aead(aead); + return ERR_PTR(ret); +} + +void ovpn_aead_crypto_key_slot_destroy(struct ovpn_crypto_key_slot *ks) +{ + if (!ks) + return; + + crypto_free_aead(ks->encrypt); + crypto_free_aead(ks->decrypt); + kfree(ks); +} + +struct ovpn_crypto_key_slot * +ovpn_aead_crypto_key_slot_new(const struct ovpn_key_config *kc) +{ + struct ovpn_crypto_key_slot *ks = NULL; + const char *alg_name; + int ret; + + /* validate crypto alg */ + switch (kc->cipher_alg) { + case OVPN_CIPHER_ALG_AES_GCM: + alg_name = "gcm(aes)"; + break; + case OVPN_CIPHER_ALG_CHACHA20_POLY1305: + alg_name = "rfc7539(chacha20,poly1305)"; + break; + default: + return ERR_PTR(-EOPNOTSUPP); + } + + if (sizeof(struct ovpn_nonce_tail) != kc->encrypt.nonce_tail_size || + sizeof(struct ovpn_nonce_tail) != kc->decrypt.nonce_tail_size) + return ERR_PTR(-EINVAL); + + /* build the key slot */ + ks = kmalloc(sizeof(*ks), GFP_KERNEL); + if (!ks) + return ERR_PTR(-ENOMEM); + + ks->encrypt = NULL; + ks->decrypt = NULL; + kref_init(&ks->refcount); + ks->key_id = kc->key_id; + + ks->encrypt = ovpn_aead_init("encrypt", alg_name, + kc->encrypt.cipher_key, + kc->encrypt.cipher_key_size); + if (IS_ERR(ks->encrypt)) { + ret = PTR_ERR(ks->encrypt); + ks->encrypt = NULL; + goto destroy_ks; + } + + ks->decrypt = ovpn_aead_init("decrypt", alg_name, + kc->decrypt.cipher_key, + kc->decrypt.cipher_key_size); + if (IS_ERR(ks->decrypt)) { + ret = PTR_ERR(ks->decrypt); + ks->decrypt = NULL; + goto destroy_ks; + } + + memcpy(ks->nonce_tail_xmit.u8, kc->encrypt.nonce_tail, + sizeof(struct ovpn_nonce_tail)); + memcpy(ks->nonce_tail_recv.u8, kc->decrypt.nonce_tail, + sizeof(struct ovpn_nonce_tail)); + + /* init packet ID generation/validation */ + ovpn_pktid_xmit_init(&ks->pid_xmit); + ovpn_pktid_recv_init(&ks->pid_recv); + + return ks; + +destroy_ks: + ovpn_aead_crypto_key_slot_destroy(ks); + return ERR_PTR(ret); +} diff --git a/drivers/net/ovpn/crypto_aead.h b/drivers/net/ovpn/crypto_aead.h new file mode 100644 index 000000000000..77ee8141599b --- /dev/null +++ b/drivers/net/ovpn/crypto_aead.h @@ -0,0 +1,31 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* OpenVPN data channel offload + * + * Copyright (C) 2020-2024 OpenVPN, Inc. + * + * Author: James Yonan <james@openvpn.net> + * Antonio Quartulli <antonio@openvpn.net> + */ + +#ifndef _NET_OVPN_OVPNAEAD_H_ +#define _NET_OVPN_OVPNAEAD_H_ + +#include "crypto.h" + +#include <asm/types.h> +#include <linux/skbuff.h> + +struct crypto_aead *ovpn_aead_init(const char *title, const char *alg_name, + const unsigned char *key, + unsigned int keylen); + +int ovpn_aead_encrypt(struct ovpn_peer *peer, struct ovpn_crypto_key_slot *ks, + struct sk_buff *skb); +int ovpn_aead_decrypt(struct ovpn_peer *peer, struct ovpn_crypto_key_slot *ks, + struct sk_buff *skb); + +struct ovpn_crypto_key_slot * +ovpn_aead_crypto_key_slot_new(const struct ovpn_key_config *kc); +void ovpn_aead_crypto_key_slot_destroy(struct ovpn_crypto_key_slot *ks); + +#endif /* _NET_OVPN_OVPNAEAD_H_ */ diff --git a/drivers/net/ovpn/io.c b/drivers/net/ovpn/io.c index ad986c66dbdf..2b9311566971 100644 --- a/drivers/net/ovpn/io.c +++ b/drivers/net/ovpn/io.c @@ -15,6 +15,8 @@ #include "ovpnstruct.h" #include "peer.h" #include "io.h" +#include "crypto.h" +#include "crypto_aead.h" #include "netlink.h" #include "proto.h" #include "udp.h" @@ -54,39 +56,122 @@ static void ovpn_netdev_write(struct ovpn_peer *peer, struct sk_buff *skb) dev_sw_netstats_rx_add(peer->ovpn->dev, skb->len); } -static void ovpn_decrypt_post(struct sk_buff *skb, int ret) +void ovpn_decrypt_post(struct sk_buff *skb, int ret) { - struct ovpn_peer *peer = ovpn_skb_cb(skb)->peer; + struct ovpn_crypto_key_slot *ks = ovpn_skb_cb(skb)->ctx->ks; + struct ovpn_peer *peer = ovpn_skb_cb(skb)->ctx->peer; + __be16 proto; + __be32 *pid; - if (unlikely(ret < 0)) + /* crypto is happening asyncronously. this function will be called + * again later by the crypto callback with a proper return code + */ + if (unlikely(ret == -EINPROGRESS)) + return; + + if (unlikely(ret < 0)) { + net_err_ratelimited("%s: error during decryption for peer %u, key-id %u: %d\n", + peer->ovpn->dev->name, peer->id, ks->key_id, + ret); + goto drop; + } + + /* PID sits after the op */ + pid = (__force __be32 *)(skb->data + OVPN_OP_SIZE_V2); + ret = ovpn_pktid_recv(&ks->pid_recv, ntohl(*pid), 0); + if (unlikely(ret < 0)) { + net_err_ratelimited("%s: PKT ID RX error: %d\n", + peer->ovpn->dev->name, ret); goto drop; + } + /* point to encapsulated IP packet */ + __skb_pull(skb, ovpn_skb_cb(skb)->ctx->payload_offset); + + /* check if this is a valid datapacket that has to be delivered to the + * ovpn interface + */ + skb_reset_network_header(skb); + proto = ovpn_ip_check_protocol(skb); + if (unlikely(!proto)) { + /* check if null packet */ + if (unlikely(!pskb_may_pull(skb, 1))) { + net_info_ratelimited("%s: NULL packet received from peer %u\n", + peer->ovpn->dev->name, peer->id); + goto drop; + } + + net_info_ratelimited("%s: unsupported protocol received from peer %u\n", + peer->ovpn->dev->name, peer->id); + goto drop; + } + skb->protocol = proto; + + /* perform Reverse Path Filtering (RPF) */ + if (unlikely(!ovpn_peer_check_by_src(peer->ovpn, skb, peer))) { + if (skb_protocol_to_family(skb) == AF_INET6) + net_dbg_ratelimited("%s: RPF dropped packet from peer %u, src: %pI6c\n", + peer->ovpn->dev->name, peer->id, + &ipv6_hdr(skb)->saddr); + else + net_dbg_ratelimited("%s: RPF dropped packet from peer %u, src: %pI4\n", + peer->ovpn->dev->name, peer->id, + &ip_hdr(skb)->saddr); + goto drop; + } + + kfree(ovpn_skb_cb(skb)->ctx); ovpn_netdev_write(peer, skb); /* skb is passed to upper layer - don't free it */ skb = NULL; drop: - if (unlikely(skb)) + if (unlikely(skb)) { dev_core_stats_rx_dropped_inc(peer->ovpn->dev); + kfree(ovpn_skb_cb(skb)->ctx); + } kfree_skb(skb); + ovpn_crypto_key_slot_put(ks); ovpn_peer_put(peer); } /* pick next packet from RX queue, decrypt and forward it to the device */ void ovpn_recv(struct ovpn_peer *peer, struct sk_buff *skb) { - ovpn_skb_cb(skb)->peer = peer; - ovpn_decrypt_post(skb, 0); + struct ovpn_crypto_key_slot *ks; + u8 key_id; + + /* get the key slot matching the key ID in the received packet */ + key_id = ovpn_key_id_from_skb(skb); + ks = ovpn_crypto_key_id_to_slot(&peer->crypto, key_id); + if (unlikely(!ks)) { + net_info_ratelimited("%s: no available key for peer %u, key-id: %u\n", + peer->ovpn->dev->name, peer->id, key_id); + dev_core_stats_rx_dropped_inc(peer->ovpn->dev); + kfree_skb(skb); + return; + } + + ovpn_decrypt_post(skb, ovpn_aead_decrypt(peer, ks, skb)); } -static void ovpn_encrypt_post(struct sk_buff *skb, int ret) +void ovpn_encrypt_post(struct sk_buff *skb, int ret) { - struct ovpn_peer *peer = ovpn_skb_cb(skb)->peer; + struct ovpn_crypto_key_slot *ks = ovpn_skb_cb(skb)->ctx->ks; + struct ovpn_peer *peer = ovpn_skb_cb(skb)->ctx->peer; + + /* encryption is happening asynchronously. This function will be + * called later by the crypto callback with a proper return value + */ + if (unlikely(ret == -EINPROGRESS)) + return; if (unlikely(ret < 0)) goto err; skb_mark_not_on_list(skb); + kfree(ovpn_skb_cb(skb)->ctx); + switch (peer->sock->sock->sk->sk_protocol) { case IPPROTO_UDP: ovpn_udp_send_skb(peer->ovpn, peer, skb); @@ -100,14 +185,31 @@ static void ovpn_encrypt_post(struct sk_buff *skb, int ret) err: if (unlikely(skb)) { dev_core_stats_tx_dropped_inc(peer->ovpn->dev); - kfree_skb(skb); + kfree(ovpn_skb_cb(skb)->ctx); } + kfree_skb(skb); + ovpn_crypto_key_slot_put(ks); ovpn_peer_put(peer); } static bool ovpn_encrypt_one(struct ovpn_peer *peer, struct sk_buff *skb) { - ovpn_skb_cb(skb)->peer = peer; + struct ovpn_crypto_key_slot *ks; + + if (unlikely(skb->ip_summed == CHECKSUM_PARTIAL && + skb_checksum_help(skb))) { + net_warn_ratelimited("%s: cannot compute checksum for outgoing packet\n", + peer->ovpn->dev->name); + return false; + } + + /* get primary key to be used for encrypting data */ + ks = ovpn_crypto_key_slot_primary(&peer->crypto); + if (unlikely(!ks)) { + net_warn_ratelimited("%s: error while retrieving primary key slot for peer %u\n", + peer->ovpn->dev->name, peer->id); + return false; + } /* take a reference to the peer because the crypto code may run async. * ovpn_encrypt_post() will release it upon completion @@ -117,7 +219,7 @@ static bool ovpn_encrypt_one(struct ovpn_peer *peer, struct sk_buff *skb) return false; } - ovpn_encrypt_post(skb, 0); + ovpn_encrypt_post(skb, ovpn_aead_encrypt(peer, ks, skb)); return true; } diff --git a/drivers/net/ovpn/io.h b/drivers/net/ovpn/io.h index 9667a0a470e0..ad741552d742 100644 --- a/drivers/net/ovpn/io.h +++ b/drivers/net/ovpn/io.h @@ -14,4 +14,7 @@ netdev_tx_t ovpn_net_xmit(struct sk_buff *skb, struct net_device *dev); void ovpn_recv(struct ovpn_peer *peer, struct sk_buff *skb); +void ovpn_encrypt_post(struct sk_buff *skb, int ret); +void ovpn_decrypt_post(struct sk_buff *skb, int ret); + #endif /* _NET_OVPN_OVPN_H_ */ diff --git a/drivers/net/ovpn/packet.h b/drivers/net/ovpn/packet.h index 7ed146f5932a..e14c9bf464f7 100644 --- a/drivers/net/ovpn/packet.h +++ b/drivers/net/ovpn/packet.h @@ -10,7 +10,7 @@ #ifndef _NET_OVPN_PACKET_H_ #define _NET_OVPN_PACKET_H_ -/* When the OpenVPN protocol is ran in AEAD mode, use +/* When the OpenVPN protocol is run in AEAD mode, use * the OpenVPN packet ID as the AEAD nonce: * * 00000005 521c3b01 4308c041 diff --git a/drivers/net/ovpn/peer.c b/drivers/net/ovpn/peer.c index 6bde4bd9395b..212d1210bc56 100644 --- a/drivers/net/ovpn/peer.c +++ b/drivers/net/ovpn/peer.c @@ -12,6 +12,8 @@ #include "ovpnstruct.h" #include "bind.h" +#include "pktid.h" +#include "crypto.h" #include "io.h" #include "main.h" #include "netlink.h" @@ -42,6 +44,7 @@ struct ovpn_peer *ovpn_peer_new(struct ovpn_struct *ovpn, u32 id) peer->vpn_addrs.ipv6 = in6addr_any; RCU_INIT_POINTER(peer->bind, NULL); + ovpn_crypto_state_init(&peer->crypto); spin_lock_init(&peer->lock); kref_init(&peer->refcount); @@ -67,6 +70,7 @@ static void ovpn_peer_release(struct ovpn_peer *peer) if (peer->sock) ovpn_socket_put(peer->sock); + ovpn_crypto_state_release(&peer->crypto); ovpn_bind_reset(peer, NULL); dst_cache_destroy(&peer->dst_cache); } @@ -277,6 +281,31 @@ struct ovpn_peer *ovpn_peer_get_by_dst(struct ovpn_struct *ovpn, return peer; } +/** + * ovpn_peer_check_by_src - check that skb source is routed via peer + * @ovpn: the openvpn instance to search + * @skb: the packet to extract source address from + * @peer: the peer to check against the source address + * + * Return: true if the peer is matching or false otherwise + */ +bool ovpn_peer_check_by_src(struct ovpn_struct *ovpn, struct sk_buff *skb, + struct ovpn_peer *peer) +{ + bool match = false; + + if (ovpn->mode == OVPN_MODE_P2P) { + /* in P2P mode, no matter the destination, packets are always + * sent to the single peer listening on the other side + */ + rcu_read_lock(); + match = (peer == rcu_dereference(ovpn->peer)); + rcu_read_unlock(); + } + + return match; +} + /** * ovpn_peer_add_p2p - add peer to related tables in a P2P instance * @ovpn: the instance to add the peer to diff --git a/drivers/net/ovpn/peer.h b/drivers/net/ovpn/peer.h index 37de5aff54a8..70d92cd5bd63 100644 --- a/drivers/net/ovpn/peer.h +++ b/drivers/net/ovpn/peer.h @@ -11,6 +11,8 @@ #define _NET_OVPN_OVPNPEER_H_ #include "bind.h" +#include "pktid.h" +#include "crypto.h" #include "socket.h" #include <net/dst_cache.h> @@ -24,6 +26,7 @@ * @vpn_addrs.ipv4: IPv4 assigned to peer on the tunnel * @vpn_addrs.ipv6: IPv6 assigned to peer on the tunnel * @sock: the socket being used to talk to this peer + * @crypto: the crypto configuration (ciphers, keys, etc..) * @dst_cache: cache for dst_entry used to send to peer * @bind: remote peer binding * @halt: true if ovpn_peer_mark_delete was called @@ -41,6 +44,7 @@ struct ovpn_peer { struct in6_addr ipv6; } vpn_addrs; struct ovpn_socket *sock; + struct ovpn_crypto_state crypto; struct dst_cache dst_cache; struct ovpn_bind __rcu *bind; bool halt; @@ -83,5 +87,7 @@ struct ovpn_peer *ovpn_peer_get_by_transp_addr(struct ovpn_struct *ovpn, struct ovpn_peer *ovpn_peer_get_by_id(struct ovpn_struct *ovpn, u32 peer_id); struct ovpn_peer *ovpn_peer_get_by_dst(struct ovpn_struct *ovpn, struct sk_buff *skb); +bool ovpn_peer_check_by_src(struct ovpn_struct *ovpn, struct sk_buff *skb, + struct ovpn_peer *peer); #endif /* _NET_OVPN_OVPNPEER_H_ */ diff --git a/drivers/net/ovpn/pktid.c b/drivers/net/ovpn/pktid.c new file mode 100644 index 000000000000..96dc87635670 --- /dev/null +++ b/drivers/net/ovpn/pktid.c @@ -0,0 +1,130 @@ +// SPDX-License-Identifier: GPL-2.0 +/* OpenVPN data channel offload + * + * Copyright (C) 2020-2024 OpenVPN, Inc. + * + * Author: Antonio Quartulli <antonio@openvpn.net> + * James Yonan <james@openvpn.net> + */ + +#include <linux/atomic.h> +#include <linux/jiffies.h> +#include <linux/net.h> +#include <linux/netdevice.h> +#include <linux/types.h> + +#include "ovpnstruct.h" +#include "main.h" +#include "packet.h" +#include "pktid.h" + +void ovpn_pktid_xmit_init(struct ovpn_pktid_xmit *pid) +{ + atomic64_set(&pid->seq_num, 1); +} + +void ovpn_pktid_recv_init(struct ovpn_pktid_recv *pr) +{ + memset(pr, 0, sizeof(*pr)); + spin_lock_init(&pr->lock); +} + +/* Packet replay detection. + * Allows ID backtrack of up to REPLAY_WINDOW_SIZE - 1. + */ +int ovpn_pktid_recv(struct ovpn_pktid_recv *pr, u32 pkt_id, u32 pkt_time) +{ + const unsigned long now = jiffies; + int ret; + + /* ID must not be zero */ + if (unlikely(pkt_id == 0)) + return -EINVAL; + + spin_lock_bh(&pr->lock); + + /* expire backtracks at or below pr->id after PKTID_RECV_EXPIRE time */ + if (unlikely(time_after_eq(now, pr->expire))) + pr->id_floor = pr->id; + + /* time changed? */ + if (unlikely(pkt_time != pr->time)) { + if (pkt_time > pr->time) { + /* time moved forward, accept */ + pr->base = 0; + pr->extent = 0; + pr->id = 0; + pr->time = pkt_time; + pr->id_floor = 0; + } else { + /* time moved backward, reject */ + ret = -ETIME; + goto out; + } + } + + if (likely(pkt_id == pr->id + 1)) { + /* well-formed ID sequence (incremented by 1) */ + pr->base = REPLAY_INDEX(pr->base, -1); + pr->history[pr->base / 8] |= (1 << (pr->base % 8)); + if (pr->extent < REPLAY_WINDOW_SIZE) + ++pr->extent; + pr->id = pkt_id; + } else if (pkt_id > pr->id) { + /* ID jumped forward by more than one */ + const unsigned int delta = pkt_id - pr->id; + + if (delta < REPLAY_WINDOW_SIZE) { + unsigned int i; + + pr->base = REPLAY_INDEX(pr->base, -delta); + pr->history[pr->base / 8] |= (1 << (pr->base % 8)); + pr->extent += delta; + if (pr->extent > REPLAY_WINDOW_SIZE) + pr->extent = REPLAY_WINDOW_SIZE; + for (i = 1; i < delta; ++i) { + unsigned int newb = REPLAY_INDEX(pr->base, i); + + pr->history[newb / 8] &= ~BIT(newb % 8); + } + } else { + pr->base = 0; + pr->extent = REPLAY_WINDOW_SIZE; + memset(pr->history, 0, sizeof(pr->history)); + pr->history[0] = 1; + } + pr->id = pkt_id; + } else { + /* ID backtrack */ + const unsigned int delta = pr->id - pkt_id; + + if (delta > pr->max_backtrack) + pr->max_backtrack = delta; + if (delta < pr->extent) { + if (pkt_id > pr->id_floor) { + const unsigned int ri = REPLAY_INDEX(pr->base, + delta); + u8 *p = &pr->history[ri / 8]; + const u8 mask = (1 << (ri % 8)); + + if (*p & mask) { + ret = -EINVAL; + goto out; + } + *p |= mask; + } else { + ret = -EINVAL; + goto out; + } + } else { + ret = -EINVAL; + goto out; + } + } + + pr->expire = now + PKTID_RECV_EXPIRE; + ret = 0; +out: + spin_unlock_bh(&pr->lock); + return ret; +} diff --git a/drivers/net/ovpn/pktid.h b/drivers/net/ovpn/pktid.h new file mode 100644 index 000000000000..fe02f0667e1a --- /dev/null +++ b/drivers/net/ovpn/pktid.h @@ -0,0 +1,87 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* OpenVPN data channel offload + * + * Copyright (C) 2020-2024 OpenVPN, Inc. + * + * Author: Antonio Quartulli <antonio@openvpn.net> + * James Yonan <james@openvpn.net> + */ + +#ifndef _NET_OVPN_OVPNPKTID_H_ +#define _NET_OVPN_OVPNPKTID_H_ + +#include "packet.h" + +/* If no packets received for this length of time, set a backtrack floor + * at highest received packet ID thus far. + */ +#define PKTID_RECV_EXPIRE (30 * HZ) + +/* Packet-ID state for transmitter */ +struct ovpn_pktid_xmit { + atomic64_t seq_num; +}; + +/* replay window sizing in bytes = 2^REPLAY_WINDOW_ORDER */ +#define REPLAY_WINDOW_ORDER 8 + +#define REPLAY_WINDOW_BYTES BIT(REPLAY_WINDOW_ORDER) +#define REPLAY_WINDOW_SIZE (REPLAY_WINDOW_BYTES * 8) +#define REPLAY_INDEX(base, i) (((base) + (i)) & (REPLAY_WINDOW_SIZE - 1)) + +/* Packet-ID state for receiver. + * Other than lock member, can be zeroed to initialize. + */ +struct ovpn_pktid_recv { + /* "sliding window" bitmask of recent packet IDs received */ + u8 history[REPLAY_WINDOW_BYTES]; + /* bit position of deque base in history */ + unsigned int base; + /* extent (in bits) of deque in history */ + unsigned int extent; + /* expiration of history in jiffies */ + unsigned long expire; + /* highest sequence number received */ + u32 id; + /* highest time stamp received */ + u32 time; + /* we will only accept backtrack IDs > id_floor */ + u32 id_floor; + unsigned int max_backtrack; + /* protects entire pktd ID state */ + spinlock_t lock; +}; + +/* Get the next packet ID for xmit */ +static inline int ovpn_pktid_xmit_next(struct ovpn_pktid_xmit *pid, u32 *pktid) +{ + const s64 seq_num = atomic64_fetch_add_unless(&pid->seq_num, 1, + 0x100000000LL); + /* when the 32bit space is over, we return an error because the packet + * ID is used to create the cipher IV and we do not want to reuse the + * same value more than once + */ + if (unlikely(seq_num == 0x100000000LL)) + return -ERANGE; + + *pktid = (u32)seq_num; + + return 0; +} + +/* Write 12-byte AEAD IV to dest */ +static inline void ovpn_pktid_aead_write(const u32 pktid, + const struct ovpn_nonce_tail *nt, + unsigned char *dest) +{ + *(__force __be32 *)(dest) = htonl(pktid); + BUILD_BUG_ON(4 + sizeof(struct ovpn_nonce_tail) != NONCE_SIZE); + memcpy(dest + 4, nt->u8, sizeof(struct ovpn_nonce_tail)); +} + +void ovpn_pktid_xmit_init(struct ovpn_pktid_xmit *pid); +void ovpn_pktid_recv_init(struct ovpn_pktid_recv *pr); + +int ovpn_pktid_recv(struct ovpn_pktid_recv *pr, u32 pkt_id, u32 pkt_time); + +#endif /* _NET_OVPN_OVPNPKTID_H_ */ diff --git a/drivers/net/ovpn/proto.h b/drivers/net/ovpn/proto.h index 69604cf26bbf..32af6b8e5743 100644 --- a/drivers/net/ovpn/proto.h +++ b/drivers/net/ovpn/proto.h @@ -72,4 +72,35 @@ static inline u32 ovpn_peer_id_from_skb(const struct sk_buff *skb, u16 offset) return ntohl(*(__be32 *)(skb->data + offset)) & OVPN_PEER_ID_MASK; } +/** + * ovpn_key_id_from_skb - extract key ID from the skb head + * @skb: the packet to extract the key ID code from + * + * Note: this function assumes that the skb head was pulled enough + * to access the first byte. + * + * Return: the key ID + */ +static inline u8 ovpn_key_id_from_skb(const struct sk_buff *skb) +{ + return *skb->data & OVPN_KEY_ID_MASK; +} + +/** + * ovpn_opcode_compose - combine OP code, key ID and peer ID to wire format + * @opcode: the OP code + * @key_id: the key ID + * @peer_id: the peer ID + * + * Return: a 4 bytes integer obtained combining all input values following the + * OpenVPN wire format. This integer can then be written to the packet header. + */ +static inline u32 ovpn_opcode_compose(u8 opcode, u8 key_id, u32 peer_id) +{ + const u8 op = (opcode << OVPN_OPCODE_SHIFT) | + (key_id & OVPN_KEY_ID_MASK); + + return (op << 24) | (peer_id & OVPN_PEER_ID_MASK); +} + #endif /* _NET_OVPN_OVPNPROTO_H_ */ diff --git a/drivers/net/ovpn/skb.h b/drivers/net/ovpn/skb.h index e070fe6f448c..3f94efae20fb 100644 --- a/drivers/net/ovpn/skb.h +++ b/drivers/net/ovpn/skb.h @@ -17,8 +17,17 @@ #include <linux/socket.h> #include <linux/types.h> -struct ovpn_cb { +struct ovpn_cb_ctx { struct ovpn_peer *peer; + struct sk_buff *skb; + struct ovpn_crypto_key_slot *ks; + struct aead_request *req; + struct scatterlist sg[MAX_SKB_FRAGS + 2]; + unsigned int payload_offset; +}; + +struct ovpn_cb { + struct ovpn_cb_ctx *ctx; }; static inline struct ovpn_cb *ovpn_skb_cb(struct sk_buff *skb)
This change implements encryption/decryption and encapsulation/decapsulation of OpenVPN packets. Support for generic crypto state is added along with a wrapper for the AEAD crypto kernel API. Signed-off-by: Antonio Quartulli <antonio@openvpn.net> --- drivers/net/ovpn/Makefile | 3 + drivers/net/ovpn/crypto.c | 149 +++++++++++++ drivers/net/ovpn/crypto.h | 136 ++++++++++++ drivers/net/ovpn/crypto_aead.c | 374 +++++++++++++++++++++++++++++++++ drivers/net/ovpn/crypto_aead.h | 31 +++ drivers/net/ovpn/io.c | 124 ++++++++++- drivers/net/ovpn/io.h | 3 + drivers/net/ovpn/packet.h | 2 +- drivers/net/ovpn/peer.c | 29 +++ drivers/net/ovpn/peer.h | 6 + drivers/net/ovpn/pktid.c | 130 ++++++++++++ drivers/net/ovpn/pktid.h | 87 ++++++++ drivers/net/ovpn/proto.h | 31 +++ drivers/net/ovpn/skb.h | 11 +- 14 files changed, 1103 insertions(+), 13 deletions(-) create mode 100644 drivers/net/ovpn/crypto.c create mode 100644 drivers/net/ovpn/crypto.h create mode 100644 drivers/net/ovpn/crypto_aead.c create mode 100644 drivers/net/ovpn/crypto_aead.h create mode 100644 drivers/net/ovpn/pktid.c create mode 100644 drivers/net/ovpn/pktid.h