From patchwork Mon Jan 22 23:04:00 2018
Content-Type: text/plain; charset="utf-8"
MIME-Version: 1.0
Content-Transfer-Encoding: 7bit
X-Patchwork-Submitter: Junaid Shahid <junaids@google.com>
X-Patchwork-Id: 10179429
X-Patchwork-Delegate: herbert@gondor.apana.org.au
Return-Path: <linux-crypto-owner@kernel.org>
Received: from mail.wl.linuxfoundation.org (pdx-wl-mail.web.codeaurora.org
	[172.30.200.125])
	by pdx-korg-patchwork.web.codeaurora.org (Postfix) with ESMTP id
	58B6B60224 for <patchwork-linux-crypto@patchwork.kernel.org>;
	Mon, 22 Jan 2018 23:04:11 +0000 (UTC)
Received: from mail.wl.linuxfoundation.org (localhost [127.0.0.1])
	by mail.wl.linuxfoundation.org (Postfix) with ESMTP id 495E628518
	for <patchwork-linux-crypto@patchwork.kernel.org>;
	Mon, 22 Jan 2018 23:04:11 +0000 (UTC)
Received: by mail.wl.linuxfoundation.org (Postfix, from userid 486)
	id 3D3D4285B9; Mon, 22 Jan 2018 23:04:11 +0000 (UTC)
X-Spam-Checker-Version: SpamAssassin 3.3.1 (2010-03-16) on
	pdx-wl-mail.web.codeaurora.org
X-Spam-Level: 
X-Spam-Status: No, score=-7.0 required=2.0 tests=BAYES_00,DKIM_SIGNED,
	DKIM_VALID, DKIM_VALID_AU,
	RCVD_IN_DNSWL_HI autolearn=ham version=3.3.1
Received: from vger.kernel.org (vger.kernel.org [209.132.180.67])
	by mail.wl.linuxfoundation.org (Postfix) with ESMTP id 8751328518
	for <patchwork-linux-crypto@patchwork.kernel.org>;
	Mon, 22 Jan 2018 23:04:10 +0000 (UTC)
Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand
	id S1751059AbeAVXEJ (ORCPT
	<rfc822;patchwork-linux-crypto@patchwork.kernel.org>);
	Mon, 22 Jan 2018 18:04:09 -0500
Received: from mail-pg0-f67.google.com ([74.125.83.67]:38435 "EHLO
	mail-pg0-f67.google.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org
	with ESMTP id S1751030AbeAVXEH (ORCPT
	<rfc822;linux-crypto@vger.kernel.org>);
	Mon, 22 Jan 2018 18:04:07 -0500
Received: by mail-pg0-f67.google.com with SMTP id y27so8264251pgc.5
	for <linux-crypto@vger.kernel.org>;
	Mon, 22 Jan 2018 15:04:07 -0800 (PST)
DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed;
	d=google.com; s=20161025;
	h=from:to:cc:subject:date:message-id:in-reply-to:references;
	bh=+be8+DI3QRXMJqYBziRWWBZMY2UwiwlJr4as/dLcxJw=;
	b=PyCu4m4jKGbn6CtI8B/qJAQh9goRWlXrm709d/zWK0CSv3Qvswywu5kOenmurDLIZT
	1TEHRQypSAUy6nYa3w5l4B+ZytpfFqlSvj5Kpuci+WVO7bBF8LK2zCyZOWyEJvAfSgrl
	xsvpTgOBzYuL7KLGb6RHivw0AR+AQPJKjfYw7GHnMROFQVF+gUnaFqfHFGo44QIIN6Tg
	dbpC2aIR5jdxisn8+Due+/z21eCJrq7rp5/Yc/Kv7uEcCiqmi9tgg2OIfFknhTkqfMkF
	17aM8isN5gwVdR3Vzri051sldyVqq9cDNGC4VTIuzLVk/qmTg2QadjAxI2f6MO/GForp
	bgmQ==
X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed;
	d=1e100.net; s=20161025;
	h=x-gm-message-state:from:to:cc:subject:date:message-id:in-reply-to
	:references;
	bh=+be8+DI3QRXMJqYBziRWWBZMY2UwiwlJr4as/dLcxJw=;
	b=Vun6XxIf1fT7PKTYDl0xMae6AcTOvrmsF09AtmDW8rPg5MRwYPLPfi552dQycZLGL7
	U84EShgKsuo2+GBX4ud0sgZr43YhAyJpsqHNPK2b7W99bUs8hEtjLqtOVVE5xZ4b+eUQ
	e3wDUoyM167qjlcF2KS9AvGfRDTw5WWji/hUBKuJ5rMG1brJx+SYEPoL0MwpTLMnZ2/a
	NrT2znXdUE6tS+DIf4fk4EyiBWE1DbzPZ9n1uYsJ2lCu6M3sOGXOt6kwHJWx/dps58HT
	9RBs5MFWb7pLKeqUWU6iZt92A+6yZTEJZVTRPuPGH0i7PMq15v4WDwOe6764aGYCA+U8
	pgjg==
X-Gm-Message-State: AKwxyteRXCznDPvZ+bkRl3KCtd7N5cqMFT+hGFrLEwjIqh3Nidt9uhW0
	nY4FQEeUMGhikyg+ZOsTUiGdPA==
X-Google-Smtp-Source: 
 AH8x226Y8VKdtdLSqQKMSaTcxiLO3YkeHBbH/XP0UcIhvLUiMRJ04f03WGx816PVGgd+62MPO5bFwA==
X-Received: by 2002:a17:902:d688:: with SMTP id
	v8-v6mr4202733ply.302.1516662246930;
	Mon, 22 Jan 2018 15:04:06 -0800 (PST)
Received: from js-desktop.svl.corp.google.com
	([2620:15c:2cb:1:cdad:b4d5:21d1:e91e])
	by smtp.gmail.com with ESMTPSA id
	c29sm1976297pfd.172.2018.01.22.15.04.06
	(version=TLS1_2 cipher=ECDHE-RSA-AES128-GCM-SHA256 bits=128/128);
	Mon, 22 Jan 2018 15:04:06 -0800 (PST)
From: Junaid Shahid <junaids@google.com>
To: herbert@gondor.apana.org.au
Cc: linux-crypto@vger.kernel.org, andreslc@google.com,
	davem@davemloft.net, gthelen@google.com, ebiggers3@gmail.com
Subject: [PATCH 1/4] crypto: aesni - Fix out-of-bounds access of the AAD
	buffer in AVX gcm-aesni
Date: Mon, 22 Jan 2018 15:04:00 -0800
Message-Id: <20180122230403.52572-2-junaids@google.com>
X-Mailer: git-send-email 2.16.0.rc1.238.g530d649a79-goog
In-Reply-To: <20180122230403.52572-1-junaids@google.com>
References: <20180122230403.52572-1-junaids@google.com>
Sender: linux-crypto-owner@vger.kernel.org
Precedence: bulk
List-ID: <linux-crypto.vger.kernel.org>
X-Mailing-List: linux-crypto@vger.kernel.org
X-Virus-Scanned: ClamAV using ClamSMTP

The AVX/AVX2 versions of gcm-aes encryption/decryption functions can
access memory after the end of the AAD buffer if the AAD length is
not a multiple of 4 bytes. It didn't matter as long as the AAD and
data buffers were always contiguous, since the AVX version are not used
for small data sizes and hence enough data bytes were always present to
cover the over-run. However, now that we have support for non-contiguous
AAD and data buffers, that is no longer the case. This can potentially
result in accessing a page that is not mapped and thus causing the
machine to crash. This patch fixes that by reading the last <16 byte
block of the AAD byte-by-byte and optionally via an 8-byte load if the
block was at least 8 bytes.

Signed-off-by: Junaid Shahid <junaids@google.com>
---
 arch/x86/crypto/aesni-intel_avx-x86_64.S | 154 +++++++++----------------------
 1 file changed, 42 insertions(+), 112 deletions(-)

diff --git a/arch/x86/crypto/aesni-intel_avx-x86_64.S b/arch/x86/crypto/aesni-intel_avx-x86_64.S
index faecb1518bf8..97029059dc1a 100644
--- a/arch/x86/crypto/aesni-intel_avx-x86_64.S
+++ b/arch/x86/crypto/aesni-intel_avx-x86_64.S
@@ -106,14 +106,6 @@
 ##
 ##        AAD Format with 64-bit Extended Sequence Number
 ##
-##
-## aadLen:
-##       from the definition of the spec, aadLen can only be 8 or 12 bytes.
-##	 The code additionally supports aadLen of length 16 bytes.
-##
-## TLen:
-##       from the definition of the spec, TLen can only be 8, 12 or 16 bytes.
-##
 ## poly = x^128 + x^127 + x^126 + x^121 + 1
 ## throughout the code, one tab and two tab indentations are used. one tab is
 ## for GHASH part, two tabs is for AES part.
@@ -155,30 +147,6 @@ SHIFT_MASK:      .octa     0x0f0e0d0c0b0a09080706050403020100
 ALL_F:           .octa     0xffffffffffffffffffffffffffffffff
                  .octa     0x00000000000000000000000000000000
 
-.section .rodata
-.align 16
-.type aad_shift_arr, @object
-.size aad_shift_arr, 272
-aad_shift_arr:
-        .octa     0xffffffffffffffffffffffffffffffff
-        .octa     0xffffffffffffffffffffffffffffff0C
-        .octa     0xffffffffffffffffffffffffffff0D0C
-        .octa     0xffffffffffffffffffffffffff0E0D0C
-        .octa     0xffffffffffffffffffffffff0F0E0D0C
-        .octa     0xffffffffffffffffffffff0C0B0A0908
-        .octa     0xffffffffffffffffffff0D0C0B0A0908
-        .octa     0xffffffffffffffffff0E0D0C0B0A0908
-        .octa     0xffffffffffffffff0F0E0D0C0B0A0908
-        .octa     0xffffffffffffff0C0B0A090807060504
-        .octa     0xffffffffffff0D0C0B0A090807060504
-        .octa     0xffffffffff0E0D0C0B0A090807060504
-        .octa     0xffffffff0F0E0D0C0B0A090807060504
-        .octa     0xffffff0C0B0A09080706050403020100
-        .octa     0xffff0D0C0B0A09080706050403020100
-        .octa     0xff0E0D0C0B0A09080706050403020100
-        .octa     0x0F0E0D0C0B0A09080706050403020100
-
-
 .text
 
 
@@ -280,6 +248,36 @@ VARIABLE_OFFSET = 16*8
                 vaesenclast 16*10(arg1), \XMM0, \XMM0
 .endm
 
+# Reads DLEN bytes starting at DPTR and stores in XMMDst
+# where 0 < DLEN < 16
+# Clobbers %rax, DLEN and XMM1
+.macro READ_PARTIAL_BLOCK DPTR DLEN XMM1 XMMDst
+        cmp $8, \DLEN
+        jl _read_lt8_\@
+        movq (\DPTR), \XMMDst
+        sub $8, \DLEN
+        jz _done_read_partial_block_\@
+	xor %eax, %eax
+_read_next_byte_\@:
+        shl $8, %rax
+        mov 7(\DPTR, \DLEN, 1), %al
+        dec \DLEN
+        jnz _read_next_byte_\@
+        movq %rax, \XMM1
+	pslldq $8, \XMM1
+        por \XMM1, \XMMDst
+	jmp _done_read_partial_block_\@
+_read_lt8_\@:
+	xor %eax, %eax
+_read_next_byte_lt8_\@:
+        shl $8, %rax
+        mov -1(\DPTR, \DLEN, 1), %al
+        dec \DLEN
+        jnz _read_next_byte_lt8_\@
+        movq %rax, \XMMDst
+_done_read_partial_block_\@:
+.endm
+
 #ifdef CONFIG_AS_AVX
 ###############################################################################
 # GHASH_MUL MACRO to implement: Data*HashKey mod (128,127,126,121,0)
@@ -400,63 +398,29 @@ VARIABLE_OFFSET = 16*8
 	setreg
 
 	mov     arg6, %r10                      # r10 = AAD
-	mov     arg7, %r12                      # r12 = aadLen
-
-
-	mov     %r12, %r11
+	mov     arg7, %r11                      # r11 = aadLen
 
 	vpxor   reg_j, reg_j, reg_j
 	vpxor   reg_i, reg_i, reg_i
 	cmp     $16, %r11
-	jl      _get_AAD_rest8\@
+	jl      _get_AAD_rest\@
 _get_AAD_blocks\@:
 	vmovdqu (%r10), reg_i
 	vpshufb SHUF_MASK(%rip), reg_i, reg_i
 	vpxor   reg_i, reg_j, reg_j
 	GHASH_MUL_AVX       reg_j, \T2, \T1, \T3, \T4, \T5, \T6
 	add     $16, %r10
-	sub     $16, %r12
 	sub     $16, %r11
 	cmp     $16, %r11
 	jge     _get_AAD_blocks\@
 	vmovdqu reg_j, reg_i
+
+	/* read the last <16B of AAD. */
+_get_AAD_rest\@:
 	cmp     $0, %r11
 	je      _get_AAD_done\@
 
-	vpxor   reg_i, reg_i, reg_i
-
-	/* read the last <16B of AAD. since we have at least 4B of
-	data right after the AAD (the ICV, and maybe some CT), we can
-	read 4B/8B blocks safely, and then get rid of the extra stuff */
-_get_AAD_rest8\@:
-	cmp     $4, %r11
-	jle     _get_AAD_rest4\@
-	movq    (%r10), \T1
-	add     $8, %r10
-	sub     $8, %r11
-	vpslldq $8, \T1, \T1
-	vpsrldq $8, reg_i, reg_i
-	vpxor   \T1, reg_i, reg_i
-	jmp     _get_AAD_rest8\@
-_get_AAD_rest4\@:
-	cmp     $0, %r11
-	jle      _get_AAD_rest0\@
-	mov     (%r10), %eax
-	movq    %rax, \T1
-	add     $4, %r10
-	sub     $4, %r11
-	vpslldq $12, \T1, \T1
-	vpsrldq $4, reg_i, reg_i
-	vpxor   \T1, reg_i, reg_i
-_get_AAD_rest0\@:
-	/* finalize: shift out the extra bytes we read, and align
-	left. since pslldq can only shift by an immediate, we use
-	vpshufb and an array of shuffle masks */
-	movq    %r12, %r11
-	salq    $4, %r11
-	movdqu  aad_shift_arr(%r11), \T1
-	vpshufb \T1, reg_i, reg_i
-_get_AAD_rest_final\@:
+	READ_PARTIAL_BLOCK %r10, %r11, \T1, reg_i
 	vpshufb SHUF_MASK(%rip), reg_i, reg_i
 	vpxor   reg_j, reg_i, reg_i
 	GHASH_MUL_AVX       reg_i, \T2, \T1, \T3, \T4, \T5, \T6
@@ -1706,64 +1670,30 @@ ENDPROC(aesni_gcm_dec_avx_gen2)
 	setreg
 
 	mov     arg6, %r10                       # r10 = AAD
-	mov     arg7, %r12                       # r12 = aadLen
-
-
-	mov     %r12, %r11
+	mov     arg7, %r11                       # r11 = aadLen
 
 	vpxor   reg_j, reg_j, reg_j
 	vpxor   reg_i, reg_i, reg_i
 
 	cmp     $16, %r11
-	jl      _get_AAD_rest8\@
+	jl      _get_AAD_rest\@
 _get_AAD_blocks\@:
 	vmovdqu (%r10), reg_i
 	vpshufb SHUF_MASK(%rip), reg_i, reg_i
 	vpxor   reg_i, reg_j, reg_j
 	GHASH_MUL_AVX2      reg_j, \T2, \T1, \T3, \T4, \T5, \T6
 	add     $16, %r10
-	sub     $16, %r12
 	sub     $16, %r11
 	cmp     $16, %r11
 	jge     _get_AAD_blocks\@
 	vmovdqu reg_j, reg_i
+
+	/* read the last <16B of AAD. */
+_get_AAD_rest\@:
 	cmp     $0, %r11
 	je      _get_AAD_done\@
 
-	vpxor   reg_i, reg_i, reg_i
-
-	/* read the last <16B of AAD. since we have at least 4B of
-	data right after the AAD (the ICV, and maybe some CT), we can
-	read 4B/8B blocks safely, and then get rid of the extra stuff */
-_get_AAD_rest8\@:
-	cmp     $4, %r11
-	jle     _get_AAD_rest4\@
-	movq    (%r10), \T1
-	add     $8, %r10
-	sub     $8, %r11
-	vpslldq $8, \T1, \T1
-	vpsrldq $8, reg_i, reg_i
-	vpxor   \T1, reg_i, reg_i
-	jmp     _get_AAD_rest8\@
-_get_AAD_rest4\@:
-	cmp     $0, %r11
-	jle     _get_AAD_rest0\@
-	mov     (%r10), %eax
-	movq    %rax, \T1
-	add     $4, %r10
-	sub     $4, %r11
-	vpslldq $12, \T1, \T1
-	vpsrldq $4, reg_i, reg_i
-	vpxor   \T1, reg_i, reg_i
-_get_AAD_rest0\@:
-	/* finalize: shift out the extra bytes we read, and align
-	left. since pslldq can only shift by an immediate, we use
-	vpshufb and an array of shuffle masks */
-	movq    %r12, %r11
-	salq    $4, %r11
-	movdqu  aad_shift_arr(%r11), \T1
-	vpshufb \T1, reg_i, reg_i
-_get_AAD_rest_final\@:
+	READ_PARTIAL_BLOCK %r10, %r11, \T1, reg_i
 	vpshufb SHUF_MASK(%rip), reg_i, reg_i
 	vpxor   reg_j, reg_i, reg_i
 	GHASH_MUL_AVX2      reg_i, \T2, \T1, \T3, \T4, \T5, \T6