Message ID | 20181105232526.173947-11-ebiggers@kernel.org (mailing list archive) |
---|---|
State | Superseded |
Delegated to: | Herbert Xu |
Headers | show |
Series | crypto: Adiantum support | expand |
On 6 November 2018 at 00:25, Eric Biggers <ebiggers@kernel.org> wrote: > From: Eric Biggers <ebiggers@google.com> > > In preparation for exposing a low-level Poly1305 API which implements > the ε-almost-∆-universal (εA∆U) hash function underlying the Poly1305 > MAC and supports block-aligned inputs only, create structures > poly1305_key and poly1305_state which hold the limbs of the Poly1305 > "r" key and accumulator, respectively. > > Signed-off-by: Eric Biggers <ebiggers@google.com> > --- > arch/x86/crypto/poly1305_glue.c | 20 +++++++------ > crypto/poly1305_generic.c | 52 ++++++++++++++++----------------- > include/crypto/poly1305.h | 12 ++++++-- > 3 files changed, 47 insertions(+), 37 deletions(-) > > diff --git a/arch/x86/crypto/poly1305_glue.c b/arch/x86/crypto/poly1305_glue.c > index f012b7e28ad1..88cc01506c84 100644 > --- a/arch/x86/crypto/poly1305_glue.c > +++ b/arch/x86/crypto/poly1305_glue.c > @@ -83,35 +83,37 @@ static unsigned int poly1305_simd_blocks(struct poly1305_desc_ctx *dctx, > if (poly1305_use_avx2 && srclen >= POLY1305_BLOCK_SIZE * 4) { > if (unlikely(!sctx->wset)) { > if (!sctx->uset) { > - memcpy(sctx->u, dctx->r, sizeof(sctx->u)); > - poly1305_simd_mult(sctx->u, dctx->r); > + memcpy(sctx->u, dctx->r.r, sizeof(sctx->u)); > + poly1305_simd_mult(sctx->u, dctx->r.r); > sctx->uset = true; > } > memcpy(sctx->u + 5, sctx->u, sizeof(sctx->u)); > - poly1305_simd_mult(sctx->u + 5, dctx->r); > + poly1305_simd_mult(sctx->u + 5, dctx->r.r); > memcpy(sctx->u + 10, sctx->u + 5, sizeof(sctx->u)); > - poly1305_simd_mult(sctx->u + 10, dctx->r); > + poly1305_simd_mult(sctx->u + 10, dctx->r.r); > sctx->wset = true; > } > blocks = srclen / (POLY1305_BLOCK_SIZE * 4); > - poly1305_4block_avx2(dctx->h, src, dctx->r, blocks, sctx->u); > + poly1305_4block_avx2(dctx->h.h, src, dctx->r.r, blocks, > + sctx->u); > src += POLY1305_BLOCK_SIZE * 4 * blocks; > srclen -= POLY1305_BLOCK_SIZE * 4 * blocks; > } > #endif > if (likely(srclen >= POLY1305_BLOCK_SIZE * 2)) { > if (unlikely(!sctx->uset)) { > - memcpy(sctx->u, dctx->r, sizeof(sctx->u)); > - poly1305_simd_mult(sctx->u, dctx->r); > + memcpy(sctx->u, dctx->r.r, sizeof(sctx->u)); > + poly1305_simd_mult(sctx->u, dctx->r.r); > sctx->uset = true; > } > blocks = srclen / (POLY1305_BLOCK_SIZE * 2); > - poly1305_2block_sse2(dctx->h, src, dctx->r, blocks, sctx->u); > + poly1305_2block_sse2(dctx->h.h, src, dctx->r.r, blocks, > + sctx->u); > src += POLY1305_BLOCK_SIZE * 2 * blocks; > srclen -= POLY1305_BLOCK_SIZE * 2 * blocks; > } > if (srclen >= POLY1305_BLOCK_SIZE) { > - poly1305_block_sse2(dctx->h, src, dctx->r, 1); > + poly1305_block_sse2(dctx->h.h, src, dctx->r.r, 1); > srclen -= POLY1305_BLOCK_SIZE; > } > return srclen; > diff --git a/crypto/poly1305_generic.c b/crypto/poly1305_generic.c > index 47d3a6b83931..a23173f351b7 100644 > --- a/crypto/poly1305_generic.c > +++ b/crypto/poly1305_generic.c > @@ -38,7 +38,7 @@ int crypto_poly1305_init(struct shash_desc *desc) > { > struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc); > > - memset(dctx->h, 0, sizeof(dctx->h)); > + memset(dctx->h.h, 0, sizeof(dctx->h.h)); > dctx->buflen = 0; > dctx->rset = false; > dctx->sset = false; > @@ -50,11 +50,11 @@ EXPORT_SYMBOL_GPL(crypto_poly1305_init); > static void poly1305_setrkey(struct poly1305_desc_ctx *dctx, const u8 *key) > { > /* r &= 0xffffffc0ffffffc0ffffffc0fffffff */ > - dctx->r[0] = (get_unaligned_le32(key + 0) >> 0) & 0x3ffffff; > - dctx->r[1] = (get_unaligned_le32(key + 3) >> 2) & 0x3ffff03; > - dctx->r[2] = (get_unaligned_le32(key + 6) >> 4) & 0x3ffc0ff; > - dctx->r[3] = (get_unaligned_le32(key + 9) >> 6) & 0x3f03fff; > - dctx->r[4] = (get_unaligned_le32(key + 12) >> 8) & 0x00fffff; > + dctx->r.r[0] = (get_unaligned_le32(key + 0) >> 0) & 0x3ffffff; > + dctx->r.r[1] = (get_unaligned_le32(key + 3) >> 2) & 0x3ffff03; > + dctx->r.r[2] = (get_unaligned_le32(key + 6) >> 4) & 0x3ffc0ff; > + dctx->r.r[3] = (get_unaligned_le32(key + 9) >> 6) & 0x3f03fff; > + dctx->r.r[4] = (get_unaligned_le32(key + 12) >> 8) & 0x00fffff; > } > > static void poly1305_setskey(struct poly1305_desc_ctx *dctx, const u8 *key) > @@ -107,22 +107,22 @@ static unsigned int poly1305_blocks(struct poly1305_desc_ctx *dctx, > srclen = datalen; > } > > - r0 = dctx->r[0]; > - r1 = dctx->r[1]; > - r2 = dctx->r[2]; > - r3 = dctx->r[3]; > - r4 = dctx->r[4]; > + r0 = dctx->r.r[0]; > + r1 = dctx->r.r[1]; > + r2 = dctx->r.r[2]; > + r3 = dctx->r.r[3]; > + r4 = dctx->r.r[4]; > > s1 = r1 * 5; > s2 = r2 * 5; > s3 = r3 * 5; > s4 = r4 * 5; > > - h0 = dctx->h[0]; > - h1 = dctx->h[1]; > - h2 = dctx->h[2]; > - h3 = dctx->h[3]; > - h4 = dctx->h[4]; > + h0 = dctx->h.h[0]; > + h1 = dctx->h.h[1]; > + h2 = dctx->h.h[2]; > + h3 = dctx->h.h[3]; > + h4 = dctx->h.h[4]; > > while (likely(srclen >= POLY1305_BLOCK_SIZE)) { > > @@ -157,11 +157,11 @@ static unsigned int poly1305_blocks(struct poly1305_desc_ctx *dctx, > srclen -= POLY1305_BLOCK_SIZE; > } > > - dctx->h[0] = h0; > - dctx->h[1] = h1; > - dctx->h[2] = h2; > - dctx->h[3] = h3; > - dctx->h[4] = h4; > + dctx->h.h[0] = h0; > + dctx->h.h[1] = h1; > + dctx->h.h[2] = h2; > + dctx->h.h[3] = h3; > + dctx->h.h[4] = h4; > > return srclen; > } > @@ -220,11 +220,11 @@ int crypto_poly1305_final(struct shash_desc *desc, u8 *dst) > } > > /* fully carry h */ > - h0 = dctx->h[0]; > - h1 = dctx->h[1]; > - h2 = dctx->h[2]; > - h3 = dctx->h[3]; > - h4 = dctx->h[4]; > + h0 = dctx->h.h[0]; > + h1 = dctx->h.h[1]; > + h2 = dctx->h.h[2]; > + h3 = dctx->h.h[3]; > + h4 = dctx->h.h[4]; > > h2 += (h1 >> 26); h1 = h1 & 0x3ffffff; > h3 += (h2 >> 26); h2 = h2 & 0x3ffffff; > diff --git a/include/crypto/poly1305.h b/include/crypto/poly1305.h > index f718a19da82f..493244c46664 100644 > --- a/include/crypto/poly1305.h > +++ b/include/crypto/poly1305.h > @@ -13,13 +13,21 @@ > #define POLY1305_KEY_SIZE 32 > #define POLY1305_DIGEST_SIZE 16 > > +struct poly1305_key { > + u32 r[5]; /* key, base 2^26 */ > +}; > + > +struct poly1305_state { > + u32 h[5]; /* accumulator, base 2^26 */ > +}; > + Sorry to bikeshed, but wouldn't it make more sense to have single definition struct poly1305_val { u32 v[5]; /* base 2^26 */ }; and have 'key' and 'accum[ulator]' fields of type struct poly1305_val in the struct below? > struct poly1305_desc_ctx { > /* key */ > - u32 r[5]; > + struct poly1305_key r; > /* finalize key */ > u32 s[4]; > /* accumulator */ > - u32 h[5]; > + struct poly1305_state h; > /* partial buffer */ > u8 buf[POLY1305_BLOCK_SIZE]; > /* bytes used in partial buffer */ > -- > 2.19.1.930.g4563a0d9d0-goog >
Hi Ard, On Tue, Nov 06, 2018 at 03:28:24PM +0100, Ard Biesheuvel wrote: > On 6 November 2018 at 00:25, Eric Biggers <ebiggers@kernel.org> wrote: > > From: Eric Biggers <ebiggers@google.com> > > > > In preparation for exposing a low-level Poly1305 API which implements > > the ε-almost-∆-universal (εA∆U) hash function underlying the Poly1305 > > MAC and supports block-aligned inputs only, create structures > > poly1305_key and poly1305_state which hold the limbs of the Poly1305 > > "r" key and accumulator, respectively. > > > > Signed-off-by: Eric Biggers <ebiggers@google.com> > > --- > > arch/x86/crypto/poly1305_glue.c | 20 +++++++------ > > crypto/poly1305_generic.c | 52 ++++++++++++++++----------------- > > include/crypto/poly1305.h | 12 ++++++-- > > 3 files changed, 47 insertions(+), 37 deletions(-) > > > > diff --git a/arch/x86/crypto/poly1305_glue.c b/arch/x86/crypto/poly1305_glue.c > > index f012b7e28ad1..88cc01506c84 100644 > > --- a/arch/x86/crypto/poly1305_glue.c > > +++ b/arch/x86/crypto/poly1305_glue.c > > @@ -83,35 +83,37 @@ static unsigned int poly1305_simd_blocks(struct poly1305_desc_ctx *dctx, > > if (poly1305_use_avx2 && srclen >= POLY1305_BLOCK_SIZE * 4) { > > if (unlikely(!sctx->wset)) { > > if (!sctx->uset) { > > - memcpy(sctx->u, dctx->r, sizeof(sctx->u)); > > - poly1305_simd_mult(sctx->u, dctx->r); > > + memcpy(sctx->u, dctx->r.r, sizeof(sctx->u)); > > + poly1305_simd_mult(sctx->u, dctx->r.r); > > sctx->uset = true; > > } > > memcpy(sctx->u + 5, sctx->u, sizeof(sctx->u)); > > - poly1305_simd_mult(sctx->u + 5, dctx->r); > > + poly1305_simd_mult(sctx->u + 5, dctx->r.r); > > memcpy(sctx->u + 10, sctx->u + 5, sizeof(sctx->u)); > > - poly1305_simd_mult(sctx->u + 10, dctx->r); > > + poly1305_simd_mult(sctx->u + 10, dctx->r.r); > > sctx->wset = true; > > } > > blocks = srclen / (POLY1305_BLOCK_SIZE * 4); > > - poly1305_4block_avx2(dctx->h, src, dctx->r, blocks, sctx->u); > > + poly1305_4block_avx2(dctx->h.h, src, dctx->r.r, blocks, > > + sctx->u); > > src += POLY1305_BLOCK_SIZE * 4 * blocks; > > srclen -= POLY1305_BLOCK_SIZE * 4 * blocks; > > } > > #endif > > if (likely(srclen >= POLY1305_BLOCK_SIZE * 2)) { > > if (unlikely(!sctx->uset)) { > > - memcpy(sctx->u, dctx->r, sizeof(sctx->u)); > > - poly1305_simd_mult(sctx->u, dctx->r); > > + memcpy(sctx->u, dctx->r.r, sizeof(sctx->u)); > > + poly1305_simd_mult(sctx->u, dctx->r.r); > > sctx->uset = true; > > } > > blocks = srclen / (POLY1305_BLOCK_SIZE * 2); > > - poly1305_2block_sse2(dctx->h, src, dctx->r, blocks, sctx->u); > > + poly1305_2block_sse2(dctx->h.h, src, dctx->r.r, blocks, > > + sctx->u); > > src += POLY1305_BLOCK_SIZE * 2 * blocks; > > srclen -= POLY1305_BLOCK_SIZE * 2 * blocks; > > } > > if (srclen >= POLY1305_BLOCK_SIZE) { > > - poly1305_block_sse2(dctx->h, src, dctx->r, 1); > > + poly1305_block_sse2(dctx->h.h, src, dctx->r.r, 1); > > srclen -= POLY1305_BLOCK_SIZE; > > } > > return srclen; > > diff --git a/crypto/poly1305_generic.c b/crypto/poly1305_generic.c > > index 47d3a6b83931..a23173f351b7 100644 > > --- a/crypto/poly1305_generic.c > > +++ b/crypto/poly1305_generic.c > > @@ -38,7 +38,7 @@ int crypto_poly1305_init(struct shash_desc *desc) > > { > > struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc); > > > > - memset(dctx->h, 0, sizeof(dctx->h)); > > + memset(dctx->h.h, 0, sizeof(dctx->h.h)); > > dctx->buflen = 0; > > dctx->rset = false; > > dctx->sset = false; > > @@ -50,11 +50,11 @@ EXPORT_SYMBOL_GPL(crypto_poly1305_init); > > static void poly1305_setrkey(struct poly1305_desc_ctx *dctx, const u8 *key) > > { > > /* r &= 0xffffffc0ffffffc0ffffffc0fffffff */ > > - dctx->r[0] = (get_unaligned_le32(key + 0) >> 0) & 0x3ffffff; > > - dctx->r[1] = (get_unaligned_le32(key + 3) >> 2) & 0x3ffff03; > > - dctx->r[2] = (get_unaligned_le32(key + 6) >> 4) & 0x3ffc0ff; > > - dctx->r[3] = (get_unaligned_le32(key + 9) >> 6) & 0x3f03fff; > > - dctx->r[4] = (get_unaligned_le32(key + 12) >> 8) & 0x00fffff; > > + dctx->r.r[0] = (get_unaligned_le32(key + 0) >> 0) & 0x3ffffff; > > + dctx->r.r[1] = (get_unaligned_le32(key + 3) >> 2) & 0x3ffff03; > > + dctx->r.r[2] = (get_unaligned_le32(key + 6) >> 4) & 0x3ffc0ff; > > + dctx->r.r[3] = (get_unaligned_le32(key + 9) >> 6) & 0x3f03fff; > > + dctx->r.r[4] = (get_unaligned_le32(key + 12) >> 8) & 0x00fffff; > > } > > > > static void poly1305_setskey(struct poly1305_desc_ctx *dctx, const u8 *key) > > @@ -107,22 +107,22 @@ static unsigned int poly1305_blocks(struct poly1305_desc_ctx *dctx, > > srclen = datalen; > > } > > > > - r0 = dctx->r[0]; > > - r1 = dctx->r[1]; > > - r2 = dctx->r[2]; > > - r3 = dctx->r[3]; > > - r4 = dctx->r[4]; > > + r0 = dctx->r.r[0]; > > + r1 = dctx->r.r[1]; > > + r2 = dctx->r.r[2]; > > + r3 = dctx->r.r[3]; > > + r4 = dctx->r.r[4]; > > > > s1 = r1 * 5; > > s2 = r2 * 5; > > s3 = r3 * 5; > > s4 = r4 * 5; > > > > - h0 = dctx->h[0]; > > - h1 = dctx->h[1]; > > - h2 = dctx->h[2]; > > - h3 = dctx->h[3]; > > - h4 = dctx->h[4]; > > + h0 = dctx->h.h[0]; > > + h1 = dctx->h.h[1]; > > + h2 = dctx->h.h[2]; > > + h3 = dctx->h.h[3]; > > + h4 = dctx->h.h[4]; > > > > while (likely(srclen >= POLY1305_BLOCK_SIZE)) { > > > > @@ -157,11 +157,11 @@ static unsigned int poly1305_blocks(struct poly1305_desc_ctx *dctx, > > srclen -= POLY1305_BLOCK_SIZE; > > } > > > > - dctx->h[0] = h0; > > - dctx->h[1] = h1; > > - dctx->h[2] = h2; > > - dctx->h[3] = h3; > > - dctx->h[4] = h4; > > + dctx->h.h[0] = h0; > > + dctx->h.h[1] = h1; > > + dctx->h.h[2] = h2; > > + dctx->h.h[3] = h3; > > + dctx->h.h[4] = h4; > > > > return srclen; > > } > > @@ -220,11 +220,11 @@ int crypto_poly1305_final(struct shash_desc *desc, u8 *dst) > > } > > > > /* fully carry h */ > > - h0 = dctx->h[0]; > > - h1 = dctx->h[1]; > > - h2 = dctx->h[2]; > > - h3 = dctx->h[3]; > > - h4 = dctx->h[4]; > > + h0 = dctx->h.h[0]; > > + h1 = dctx->h.h[1]; > > + h2 = dctx->h.h[2]; > > + h3 = dctx->h.h[3]; > > + h4 = dctx->h.h[4]; > > > > h2 += (h1 >> 26); h1 = h1 & 0x3ffffff; > > h3 += (h2 >> 26); h2 = h2 & 0x3ffffff; > > diff --git a/include/crypto/poly1305.h b/include/crypto/poly1305.h > > index f718a19da82f..493244c46664 100644 > > --- a/include/crypto/poly1305.h > > +++ b/include/crypto/poly1305.h > > @@ -13,13 +13,21 @@ > > #define POLY1305_KEY_SIZE 32 > > #define POLY1305_DIGEST_SIZE 16 > > > > +struct poly1305_key { > > + u32 r[5]; /* key, base 2^26 */ > > +}; > > + > > +struct poly1305_state { > > + u32 h[5]; /* accumulator, base 2^26 */ > > +}; > > + > > Sorry to bikeshed, but wouldn't it make more sense to have single definition > > struct poly1305_val { > u32 v[5]; /* base 2^26 */ > }; > > and have 'key' and 'accum[ulator]' fields of type struct poly1305_val > in the struct below? > I prefer separate types so that the type system enforces that a key is never accidentally used as an accumulator, and vice versa. Then the poly1305_core_* functions will be harder to misuse, and the Poly1305 MAC implementations harder to get wrong. The key also has certain bits clear whereas the accumulator does not. In the future, the Poly1305 C implementation might use base 2^32 and take advantage of this. In that case, the two inputs to each multiplication won't be interchangeable, so using the same type for both would be extra confusing. Having a poly1305_val nested inside poly1305_key and poly1305_state would work, but seemed excessive. > > struct poly1305_desc_ctx { > > /* key */ > > - u32 r[5]; > > + struct poly1305_key r; > > /* finalize key */ > > u32 s[4]; > > /* accumulator */ > > - u32 h[5]; > > + struct poly1305_state h; > > /* partial buffer */ > > u8 buf[POLY1305_BLOCK_SIZE]; > > /* bytes used in partial buffer */ > > -- Thanks, - Eric
Hi Eric: On Mon, Nov 12, 2018 at 10:58:17AM -0800, Eric Biggers wrote: > > I prefer separate types so that the type system enforces that a key is never > accidentally used as an accumulator, and vice versa. Then the poly1305_core_* > functions will be harder to misuse, and the Poly1305 MAC implementations harder > to get wrong. > > The key also has certain bits clear whereas the accumulator does not. In the > future, the Poly1305 C implementation might use base 2^32 and take advantage of > this. In that case, the two inputs to each multiplication won't be > interchangeable, so using the same type for both would be extra confusing. > > Having a poly1305_val nested inside poly1305_key and poly1305_state would work, > but seemed excessive. So it looks like there are no more unresolved issues with this patch series. Please let me know when you want it to go in. Thanks,
Hi Herbert, On Fri, Nov 16, 2018 at 02:02:27PM +0800, Herbert Xu wrote: > Hi Eric: > > On Mon, Nov 12, 2018 at 10:58:17AM -0800, Eric Biggers wrote: > > > > I prefer separate types so that the type system enforces that a key is never > > accidentally used as an accumulator, and vice versa. Then the poly1305_core_* > > functions will be harder to misuse, and the Poly1305 MAC implementations harder > > to get wrong. > > > > The key also has certain bits clear whereas the accumulator does not. In the > > future, the Poly1305 C implementation might use base 2^32 and take advantage of > > this. In that case, the two inputs to each multiplication won't be > > interchangeable, so using the same type for both would be extra confusing. > > > > Having a poly1305_val nested inside poly1305_key and poly1305_state would work, > > but seemed excessive. > > So it looks like there are no more unresolved issues with this > patch series. Please let me know when you want it to go in. > I believe it's ready to go in. I'll need to rebase it onto cryptodev, to resolve some small conflicts with the recent ChaCha20-related changes. Also I'll probably omit the fscrypt patch (patch 15/15) so that that patch can be taken through the fscrypt tree instead. Do you prefer that this be merged before or after Zinc? It seems it may still be a while before the community is satisfied with Zinc (and Wireguard which is in the same patchset), and I don't want this blocked unnecessarily... So on my part I'd prefer to just have this merged as-is. Of course, it's always possible to change the xchacha12 and xchacha20 implementations later, whether that's to use "Zinc" or otherwise. And NHPoly1305 and Adiantum itself will be the same either way, except that the Poly1305 helpers may change slightly. What do you think? Thanks! - Eric
On Fri, 16 Nov 2018 at 16:17, Eric Biggers <ebiggers@kernel.org> wrote: > > Hi Herbert, > > On Fri, Nov 16, 2018 at 02:02:27PM +0800, Herbert Xu wrote: > > Hi Eric: > > > > On Mon, Nov 12, 2018 at 10:58:17AM -0800, Eric Biggers wrote: > > > > > > I prefer separate types so that the type system enforces that a key is never > > > accidentally used as an accumulator, and vice versa. Then the poly1305_core_* > > > functions will be harder to misuse, and the Poly1305 MAC implementations harder > > > to get wrong. > > > > > > The key also has certain bits clear whereas the accumulator does not. In the > > > future, the Poly1305 C implementation might use base 2^32 and take advantage of > > > this. In that case, the two inputs to each multiplication won't be > > > interchangeable, so using the same type for both would be extra confusing. > > > > > > Having a poly1305_val nested inside poly1305_key and poly1305_state would work, > > > but seemed excessive. > > > > So it looks like there are no more unresolved issues with this > > patch series. Please let me know when you want it to go in. > > > > I believe it's ready to go in. I'll need to rebase it onto cryptodev, to > resolve some small conflicts with the recent ChaCha20-related changes. Also > I'll probably omit the fscrypt patch (patch 15/15) so that that patch can be > taken through the fscrypt tree instead. > > Do you prefer that this be merged before or after Zinc? It seems it may still > be a while before the community is satisfied with Zinc (and Wireguard which is > in the same patchset), and I don't want this blocked unnecessarily... So on my > part I'd prefer to just have this merged as-is. > > Of course, it's always possible to change the xchacha12 and xchacha20 > implementations later, whether that's to use "Zinc" or otherwise. And > NHPoly1305 and Adiantum itself will be the same either way, except that the > Poly1305 helpers may change slightly. > > What do you think? > I think this is ready to go in. Most of Zinc itself will not be blocked by this, only the changes to the crypto API ChaCha20 implementation will have to wait until the Zinc version gains support for the reduced round variant, but that will not block WireGuard.
Hi Eric, On Sat, Nov 17, 2018 at 1:17 AM Eric Biggers <ebiggers@kernel.org> wrote: > Do you prefer that this be merged before or after Zinc? It seems it may still > be a while before the community is satisfied with Zinc (and Wireguard which is > in the same patchset), and I don't want this blocked unnecessarily... So on my > part I'd prefer to just have this merged as-is. Personally I'd prefer this be merged after Zinc, since there's work to be done on adjusting the 20->12 in chacha20. That's not really much of a reason though. But maybe we can just sidestep the ordering concern all together: What I suspect we should do is make the initial Zinc merge _without_ those top two patches that replace the crypto api's implementations with Zinc, and defer those until after the initial merges. Those commits are already written -- so there's no chance it won't happen due to laziness or something -- and I think the general merge will go a bit more smoothly if we wait until after. (Somebody suggested we do it this way at Plumbers, and was actually a bit surprised I had already ported the crypto API stuff to Zinc.) This way, Adiantum and Zinc aren't potentially co-dependent in their initial merges and we can work on the details carefully with each other after both have landed. I figure this might make things a little bit less stressful for both of us. How would you feel about doing it that? Jason
diff --git a/arch/x86/crypto/poly1305_glue.c b/arch/x86/crypto/poly1305_glue.c index f012b7e28ad1..88cc01506c84 100644 --- a/arch/x86/crypto/poly1305_glue.c +++ b/arch/x86/crypto/poly1305_glue.c @@ -83,35 +83,37 @@ static unsigned int poly1305_simd_blocks(struct poly1305_desc_ctx *dctx, if (poly1305_use_avx2 && srclen >= POLY1305_BLOCK_SIZE * 4) { if (unlikely(!sctx->wset)) { if (!sctx->uset) { - memcpy(sctx->u, dctx->r, sizeof(sctx->u)); - poly1305_simd_mult(sctx->u, dctx->r); + memcpy(sctx->u, dctx->r.r, sizeof(sctx->u)); + poly1305_simd_mult(sctx->u, dctx->r.r); sctx->uset = true; } memcpy(sctx->u + 5, sctx->u, sizeof(sctx->u)); - poly1305_simd_mult(sctx->u + 5, dctx->r); + poly1305_simd_mult(sctx->u + 5, dctx->r.r); memcpy(sctx->u + 10, sctx->u + 5, sizeof(sctx->u)); - poly1305_simd_mult(sctx->u + 10, dctx->r); + poly1305_simd_mult(sctx->u + 10, dctx->r.r); sctx->wset = true; } blocks = srclen / (POLY1305_BLOCK_SIZE * 4); - poly1305_4block_avx2(dctx->h, src, dctx->r, blocks, sctx->u); + poly1305_4block_avx2(dctx->h.h, src, dctx->r.r, blocks, + sctx->u); src += POLY1305_BLOCK_SIZE * 4 * blocks; srclen -= POLY1305_BLOCK_SIZE * 4 * blocks; } #endif if (likely(srclen >= POLY1305_BLOCK_SIZE * 2)) { if (unlikely(!sctx->uset)) { - memcpy(sctx->u, dctx->r, sizeof(sctx->u)); - poly1305_simd_mult(sctx->u, dctx->r); + memcpy(sctx->u, dctx->r.r, sizeof(sctx->u)); + poly1305_simd_mult(sctx->u, dctx->r.r); sctx->uset = true; } blocks = srclen / (POLY1305_BLOCK_SIZE * 2); - poly1305_2block_sse2(dctx->h, src, dctx->r, blocks, sctx->u); + poly1305_2block_sse2(dctx->h.h, src, dctx->r.r, blocks, + sctx->u); src += POLY1305_BLOCK_SIZE * 2 * blocks; srclen -= POLY1305_BLOCK_SIZE * 2 * blocks; } if (srclen >= POLY1305_BLOCK_SIZE) { - poly1305_block_sse2(dctx->h, src, dctx->r, 1); + poly1305_block_sse2(dctx->h.h, src, dctx->r.r, 1); srclen -= POLY1305_BLOCK_SIZE; } return srclen; diff --git a/crypto/poly1305_generic.c b/crypto/poly1305_generic.c index 47d3a6b83931..a23173f351b7 100644 --- a/crypto/poly1305_generic.c +++ b/crypto/poly1305_generic.c @@ -38,7 +38,7 @@ int crypto_poly1305_init(struct shash_desc *desc) { struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc); - memset(dctx->h, 0, sizeof(dctx->h)); + memset(dctx->h.h, 0, sizeof(dctx->h.h)); dctx->buflen = 0; dctx->rset = false; dctx->sset = false; @@ -50,11 +50,11 @@ EXPORT_SYMBOL_GPL(crypto_poly1305_init); static void poly1305_setrkey(struct poly1305_desc_ctx *dctx, const u8 *key) { /* r &= 0xffffffc0ffffffc0ffffffc0fffffff */ - dctx->r[0] = (get_unaligned_le32(key + 0) >> 0) & 0x3ffffff; - dctx->r[1] = (get_unaligned_le32(key + 3) >> 2) & 0x3ffff03; - dctx->r[2] = (get_unaligned_le32(key + 6) >> 4) & 0x3ffc0ff; - dctx->r[3] = (get_unaligned_le32(key + 9) >> 6) & 0x3f03fff; - dctx->r[4] = (get_unaligned_le32(key + 12) >> 8) & 0x00fffff; + dctx->r.r[0] = (get_unaligned_le32(key + 0) >> 0) & 0x3ffffff; + dctx->r.r[1] = (get_unaligned_le32(key + 3) >> 2) & 0x3ffff03; + dctx->r.r[2] = (get_unaligned_le32(key + 6) >> 4) & 0x3ffc0ff; + dctx->r.r[3] = (get_unaligned_le32(key + 9) >> 6) & 0x3f03fff; + dctx->r.r[4] = (get_unaligned_le32(key + 12) >> 8) & 0x00fffff; } static void poly1305_setskey(struct poly1305_desc_ctx *dctx, const u8 *key) @@ -107,22 +107,22 @@ static unsigned int poly1305_blocks(struct poly1305_desc_ctx *dctx, srclen = datalen; } - r0 = dctx->r[0]; - r1 = dctx->r[1]; - r2 = dctx->r[2]; - r3 = dctx->r[3]; - r4 = dctx->r[4]; + r0 = dctx->r.r[0]; + r1 = dctx->r.r[1]; + r2 = dctx->r.r[2]; + r3 = dctx->r.r[3]; + r4 = dctx->r.r[4]; s1 = r1 * 5; s2 = r2 * 5; s3 = r3 * 5; s4 = r4 * 5; - h0 = dctx->h[0]; - h1 = dctx->h[1]; - h2 = dctx->h[2]; - h3 = dctx->h[3]; - h4 = dctx->h[4]; + h0 = dctx->h.h[0]; + h1 = dctx->h.h[1]; + h2 = dctx->h.h[2]; + h3 = dctx->h.h[3]; + h4 = dctx->h.h[4]; while (likely(srclen >= POLY1305_BLOCK_SIZE)) { @@ -157,11 +157,11 @@ static unsigned int poly1305_blocks(struct poly1305_desc_ctx *dctx, srclen -= POLY1305_BLOCK_SIZE; } - dctx->h[0] = h0; - dctx->h[1] = h1; - dctx->h[2] = h2; - dctx->h[3] = h3; - dctx->h[4] = h4; + dctx->h.h[0] = h0; + dctx->h.h[1] = h1; + dctx->h.h[2] = h2; + dctx->h.h[3] = h3; + dctx->h.h[4] = h4; return srclen; } @@ -220,11 +220,11 @@ int crypto_poly1305_final(struct shash_desc *desc, u8 *dst) } /* fully carry h */ - h0 = dctx->h[0]; - h1 = dctx->h[1]; - h2 = dctx->h[2]; - h3 = dctx->h[3]; - h4 = dctx->h[4]; + h0 = dctx->h.h[0]; + h1 = dctx->h.h[1]; + h2 = dctx->h.h[2]; + h3 = dctx->h.h[3]; + h4 = dctx->h.h[4]; h2 += (h1 >> 26); h1 = h1 & 0x3ffffff; h3 += (h2 >> 26); h2 = h2 & 0x3ffffff; diff --git a/include/crypto/poly1305.h b/include/crypto/poly1305.h index f718a19da82f..493244c46664 100644 --- a/include/crypto/poly1305.h +++ b/include/crypto/poly1305.h @@ -13,13 +13,21 @@ #define POLY1305_KEY_SIZE 32 #define POLY1305_DIGEST_SIZE 16 +struct poly1305_key { + u32 r[5]; /* key, base 2^26 */ +}; + +struct poly1305_state { + u32 h[5]; /* accumulator, base 2^26 */ +}; + struct poly1305_desc_ctx { /* key */ - u32 r[5]; + struct poly1305_key r; /* finalize key */ u32 s[4]; /* accumulator */ - u32 h[5]; + struct poly1305_state h; /* partial buffer */ u8 buf[POLY1305_BLOCK_SIZE]; /* bytes used in partial buffer */