diff mbox series

[1/2] crypto: talitos - Work around SEC6 ERRATA (AES-CTR mode data size error)

Message ID 4b7a870573f485b9fea496b13c9b02d86dd97314.1611169001.git.christophe.leroy@csgroup.eu (mailing list archive)
State Accepted
Delegated to: Herbert Xu
Headers show
Series [1/2] crypto: talitos - Work around SEC6 ERRATA (AES-CTR mode data size error) | expand

Commit Message

Christophe Leroy Jan. 20, 2021, 6:57 p.m. UTC
Talitos Security Engine AESU considers any input
data size that is not a multiple of 16 bytes to be an error.
This is not a problem in general, except for Counter mode
that is a stream cipher and can have an input of any size.

Test Manager for ctr(aes) fails on 4th test vector which has
a length of 499 while all previous vectors which have a 16 bytes
multiple length succeed.

As suggested by Freescale, round up the input data length to the
nearest 16 bytes.

Fixes: 5e75ae1b3cef ("crypto: talitos - add new crypto modes")
Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
---
 drivers/crypto/talitos.c | 28 ++++++++++++++++------------
 drivers/crypto/talitos.h |  1 +
 2 files changed, 17 insertions(+), 12 deletions(-)

Comments

Ard Biesheuvel Jan. 20, 2021, 10:23 p.m. UTC | #1
On Wed, 20 Jan 2021 at 19:59, Christophe Leroy
<christophe.leroy@csgroup.eu> wrote:
>
> Talitos Security Engine AESU considers any input
> data size that is not a multiple of 16 bytes to be an error.
> This is not a problem in general, except for Counter mode
> that is a stream cipher and can have an input of any size.
>
> Test Manager for ctr(aes) fails on 4th test vector which has
> a length of 499 while all previous vectors which have a 16 bytes
> multiple length succeed.
>
> As suggested by Freescale, round up the input data length to the
> nearest 16 bytes.
>
> Fixes: 5e75ae1b3cef ("crypto: talitos - add new crypto modes")
> Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>

Doesn't this cause the hardware to write outside the given buffer?

> ---
>  drivers/crypto/talitos.c | 28 ++++++++++++++++------------
>  drivers/crypto/talitos.h |  1 +
>  2 files changed, 17 insertions(+), 12 deletions(-)
>
> diff --git a/drivers/crypto/talitos.c b/drivers/crypto/talitos.c
> index 4fd85f31630a..b656983c1ef4 100644
> --- a/drivers/crypto/talitos.c
> +++ b/drivers/crypto/talitos.c
> @@ -1093,11 +1093,12 @@ static void ipsec_esp_decrypt_hwauth_done(struct device *dev,
>   */
>  static int sg_to_link_tbl_offset(struct scatterlist *sg, int sg_count,
>                                  unsigned int offset, int datalen, int elen,
> -                                struct talitos_ptr *link_tbl_ptr)
> +                                struct talitos_ptr *link_tbl_ptr, int align)
>  {
>         int n_sg = elen ? sg_count + 1 : sg_count;
>         int count = 0;
>         int cryptlen = datalen + elen;
> +       int padding = ALIGN(cryptlen, align) - cryptlen;
>
>         while (cryptlen && sg && n_sg--) {
>                 unsigned int len = sg_dma_len(sg);
> @@ -1121,7 +1122,7 @@ static int sg_to_link_tbl_offset(struct scatterlist *sg, int sg_count,
>                         offset += datalen;
>                 }
>                 to_talitos_ptr(link_tbl_ptr + count,
> -                              sg_dma_address(sg) + offset, len, 0);
> +                              sg_dma_address(sg) + offset, sg_next(sg) ? len : len + padding, 0);
>                 to_talitos_ptr_ext_set(link_tbl_ptr + count, 0, 0);
>                 count++;
>                 cryptlen -= len;
> @@ -1144,10 +1145,11 @@ static int talitos_sg_map_ext(struct device *dev, struct scatterlist *src,
>                               unsigned int len, struct talitos_edesc *edesc,
>                               struct talitos_ptr *ptr, int sg_count,
>                               unsigned int offset, int tbl_off, int elen,
> -                             bool force)
> +                             bool force, int align)
>  {
>         struct talitos_private *priv = dev_get_drvdata(dev);
>         bool is_sec1 = has_ftr_sec1(priv);
> +       int aligned_len = ALIGN(len, align);
>
>         if (!src) {
>                 to_talitos_ptr(ptr, 0, 0, is_sec1);
> @@ -1155,22 +1157,22 @@ static int talitos_sg_map_ext(struct device *dev, struct scatterlist *src,
>         }
>         to_talitos_ptr_ext_set(ptr, elen, is_sec1);
>         if (sg_count == 1 && !force) {
> -               to_talitos_ptr(ptr, sg_dma_address(src) + offset, len, is_sec1);
> +               to_talitos_ptr(ptr, sg_dma_address(src) + offset, aligned_len, is_sec1);
>                 return sg_count;
>         }
>         if (is_sec1) {
> -               to_talitos_ptr(ptr, edesc->dma_link_tbl + offset, len, is_sec1);
> +               to_talitos_ptr(ptr, edesc->dma_link_tbl + offset, aligned_len, is_sec1);
>                 return sg_count;
>         }
>         sg_count = sg_to_link_tbl_offset(src, sg_count, offset, len, elen,
> -                                        &edesc->link_tbl[tbl_off]);
> +                                        &edesc->link_tbl[tbl_off], align);
>         if (sg_count == 1 && !force) {
>                 /* Only one segment now, so no link tbl needed*/
>                 copy_talitos_ptr(ptr, &edesc->link_tbl[tbl_off], is_sec1);
>                 return sg_count;
>         }
>         to_talitos_ptr(ptr, edesc->dma_link_tbl +
> -                           tbl_off * sizeof(struct talitos_ptr), len, is_sec1);
> +                           tbl_off * sizeof(struct talitos_ptr), aligned_len, is_sec1);
>         to_talitos_ptr_ext_or(ptr, DESC_PTR_LNKTBL_JUMP, is_sec1);
>
>         return sg_count;
> @@ -1182,7 +1184,7 @@ static int talitos_sg_map(struct device *dev, struct scatterlist *src,
>                           unsigned int offset, int tbl_off)
>  {
>         return talitos_sg_map_ext(dev, src, len, edesc, ptr, sg_count, offset,
> -                                 tbl_off, 0, false);
> +                                 tbl_off, 0, false, 1);
>  }
>
>  /*
> @@ -1251,7 +1253,7 @@ static int ipsec_esp(struct talitos_edesc *edesc, struct aead_request *areq,
>
>         ret = talitos_sg_map_ext(dev, areq->src, cryptlen, edesc, &desc->ptr[4],
>                                  sg_count, areq->assoclen, tbl_off, elen,
> -                                false);
> +                                false, 1);
>
>         if (ret > 1) {
>                 tbl_off += ret;
> @@ -1271,7 +1273,7 @@ static int ipsec_esp(struct talitos_edesc *edesc, struct aead_request *areq,
>                 elen = 0;
>         ret = talitos_sg_map_ext(dev, areq->dst, cryptlen, edesc, &desc->ptr[5],
>                                  sg_count, areq->assoclen, tbl_off, elen,
> -                                is_ipsec_esp && !encrypt);
> +                                is_ipsec_esp && !encrypt, 1);
>         tbl_off += ret;
>
>         if (!encrypt && is_ipsec_esp) {
> @@ -1577,6 +1579,8 @@ static int common_nonsnoop(struct talitos_edesc *edesc,
>         bool sync_needed = false;
>         struct talitos_private *priv = dev_get_drvdata(dev);
>         bool is_sec1 = has_ftr_sec1(priv);
> +       bool is_ctr = (desc->hdr & DESC_HDR_SEL0_MASK) == DESC_HDR_SEL0_AESU &&
> +                     (desc->hdr & DESC_HDR_MODE0_AESU_MASK) == DESC_HDR_MODE0_AESU_CTR;
>
>         /* first DWORD empty */
>
> @@ -1597,8 +1601,8 @@ static int common_nonsnoop(struct talitos_edesc *edesc,
>         /*
>          * cipher in
>          */
> -       sg_count = talitos_sg_map(dev, areq->src, cryptlen, edesc,
> -                                 &desc->ptr[3], sg_count, 0, 0);
> +       sg_count = talitos_sg_map_ext(dev, areq->src, cryptlen, edesc, &desc->ptr[3],
> +                                     sg_count, 0, 0, 0, false, is_ctr ? 16 : 1);
>         if (sg_count > 1)
>                 sync_needed = true;
>
> diff --git a/drivers/crypto/talitos.h b/drivers/crypto/talitos.h
> index 1469b956948a..32825119e880 100644
> --- a/drivers/crypto/talitos.h
> +++ b/drivers/crypto/talitos.h
> @@ -344,6 +344,7 @@ static inline bool has_ftr_sec1(struct talitos_private *priv)
>
>  /* primary execution unit mode (MODE0) and derivatives */
>  #define        DESC_HDR_MODE0_ENCRYPT          cpu_to_be32(0x00100000)
> +#define        DESC_HDR_MODE0_AESU_MASK        cpu_to_be32(0x00600000)
>  #define        DESC_HDR_MODE0_AESU_CBC         cpu_to_be32(0x00200000)
>  #define        DESC_HDR_MODE0_AESU_CTR         cpu_to_be32(0x00600000)
>  #define        DESC_HDR_MODE0_DEU_CBC          cpu_to_be32(0x00400000)
> --
> 2.25.0
>
Christophe Leroy Jan. 21, 2021, 5:35 a.m. UTC | #2
Le 20/01/2021 à 23:23, Ard Biesheuvel a écrit :
> On Wed, 20 Jan 2021 at 19:59, Christophe Leroy
> <christophe.leroy@csgroup.eu> wrote:
>>
>> Talitos Security Engine AESU considers any input
>> data size that is not a multiple of 16 bytes to be an error.
>> This is not a problem in general, except for Counter mode
>> that is a stream cipher and can have an input of any size.
>>
>> Test Manager for ctr(aes) fails on 4th test vector which has
>> a length of 499 while all previous vectors which have a 16 bytes
>> multiple length succeed.
>>
>> As suggested by Freescale, round up the input data length to the
>> nearest 16 bytes.
>>
>> Fixes: 5e75ae1b3cef ("crypto: talitos - add new crypto modes")
>> Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
> 
> Doesn't this cause the hardware to write outside the given buffer?


Only the input length is modified. Not the output length.

The ERRATA says:

The input data length (in the descriptor) can be rounded up to the nearest 16B. Set the
data-in length (in the descriptor) to include X bytes of data beyond the payload. Set the
data-out length to only output the relevant payload (don't need to output the padding).
SEC reads from memory are not destructive, so the extra bytes included in the AES-CTR
operation can be whatever bytes are contiguously trailing the payload.
Ard Biesheuvel Jan. 21, 2021, 7:31 a.m. UTC | #3
On Thu, 21 Jan 2021 at 06:35, Christophe Leroy
<christophe.leroy@csgroup.eu> wrote:
>
>
>
> Le 20/01/2021 à 23:23, Ard Biesheuvel a écrit :
> > On Wed, 20 Jan 2021 at 19:59, Christophe Leroy
> > <christophe.leroy@csgroup.eu> wrote:
> >>
> >> Talitos Security Engine AESU considers any input
> >> data size that is not a multiple of 16 bytes to be an error.
> >> This is not a problem in general, except for Counter mode
> >> that is a stream cipher and can have an input of any size.
> >>
> >> Test Manager for ctr(aes) fails on 4th test vector which has
> >> a length of 499 while all previous vectors which have a 16 bytes
> >> multiple length succeed.
> >>
> >> As suggested by Freescale, round up the input data length to the
> >> nearest 16 bytes.
> >>
> >> Fixes: 5e75ae1b3cef ("crypto: talitos - add new crypto modes")
> >> Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
> >
> > Doesn't this cause the hardware to write outside the given buffer?
>
>
> Only the input length is modified. Not the output length.
>
> The ERRATA says:
>
> The input data length (in the descriptor) can be rounded up to the nearest 16B. Set the
> data-in length (in the descriptor) to include X bytes of data beyond the payload. Set the
> data-out length to only output the relevant payload (don't need to output the padding).
> SEC reads from memory are not destructive, so the extra bytes included in the AES-CTR
> operation can be whatever bytes are contiguously trailing the payload.

So what happens if the input is not 16 byte aligned, and rounding it
up causes it to extend across a page boundary into a page that is not
mapped by the IOMMU/SMMU?
Christophe Leroy Jan. 21, 2021, 9:54 a.m. UTC | #4
Le 21/01/2021 à 08:31, Ard Biesheuvel a écrit :
> On Thu, 21 Jan 2021 at 06:35, Christophe Leroy
> <christophe.leroy@csgroup.eu> wrote:
>>
>>
>>
>> Le 20/01/2021 à 23:23, Ard Biesheuvel a écrit :
>>> On Wed, 20 Jan 2021 at 19:59, Christophe Leroy
>>> <christophe.leroy@csgroup.eu> wrote:
>>>>
>>>> Talitos Security Engine AESU considers any input
>>>> data size that is not a multiple of 16 bytes to be an error.
>>>> This is not a problem in general, except for Counter mode
>>>> that is a stream cipher and can have an input of any size.
>>>>
>>>> Test Manager for ctr(aes) fails on 4th test vector which has
>>>> a length of 499 while all previous vectors which have a 16 bytes
>>>> multiple length succeed.
>>>>
>>>> As suggested by Freescale, round up the input data length to the
>>>> nearest 16 bytes.
>>>>
>>>> Fixes: 5e75ae1b3cef ("crypto: talitos - add new crypto modes")
>>>> Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
>>>
>>> Doesn't this cause the hardware to write outside the given buffer?
>>
>>
>> Only the input length is modified. Not the output length.
>>
>> The ERRATA says:
>>
>> The input data length (in the descriptor) can be rounded up to the nearest 16B. Set the
>> data-in length (in the descriptor) to include X bytes of data beyond the payload. Set the
>> data-out length to only output the relevant payload (don't need to output the padding).
>> SEC reads from memory are not destructive, so the extra bytes included in the AES-CTR
>> operation can be whatever bytes are contiguously trailing the payload.
> 
> So what happens if the input is not 16 byte aligned, and rounding it
> up causes it to extend across a page boundary into a page that is not
> mapped by the IOMMU/SMMU?
> 

What is the IOMMU/SMMU ?

The mpc8xx, mpc82xx and mpc83xx which embed the Talitos Security Engine don't have such thing, the 
security engine uses DMA and has direct access to the memory bus for reading and writing.

Christophe
Ard Biesheuvel Jan. 21, 2021, 10:02 a.m. UTC | #5
On Thu, 21 Jan 2021 at 10:54, Christophe Leroy
<christophe.leroy@csgroup.eu> wrote:
>
>
>
> Le 21/01/2021 à 08:31, Ard Biesheuvel a écrit :
> > On Thu, 21 Jan 2021 at 06:35, Christophe Leroy
> > <christophe.leroy@csgroup.eu> wrote:
> >>
> >>
> >>
> >> Le 20/01/2021 à 23:23, Ard Biesheuvel a écrit :
> >>> On Wed, 20 Jan 2021 at 19:59, Christophe Leroy
> >>> <christophe.leroy@csgroup.eu> wrote:
> >>>>
> >>>> Talitos Security Engine AESU considers any input
> >>>> data size that is not a multiple of 16 bytes to be an error.
> >>>> This is not a problem in general, except for Counter mode
> >>>> that is a stream cipher and can have an input of any size.
> >>>>
> >>>> Test Manager for ctr(aes) fails on 4th test vector which has
> >>>> a length of 499 while all previous vectors which have a 16 bytes
> >>>> multiple length succeed.
> >>>>
> >>>> As suggested by Freescale, round up the input data length to the
> >>>> nearest 16 bytes.
> >>>>
> >>>> Fixes: 5e75ae1b3cef ("crypto: talitos - add new crypto modes")
> >>>> Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
> >>>
> >>> Doesn't this cause the hardware to write outside the given buffer?
> >>
> >>
> >> Only the input length is modified. Not the output length.
> >>
> >> The ERRATA says:
> >>
> >> The input data length (in the descriptor) can be rounded up to the nearest 16B. Set the
> >> data-in length (in the descriptor) to include X bytes of data beyond the payload. Set the
> >> data-out length to only output the relevant payload (don't need to output the padding).
> >> SEC reads from memory are not destructive, so the extra bytes included in the AES-CTR
> >> operation can be whatever bytes are contiguously trailing the payload.
> >
> > So what happens if the input is not 16 byte aligned, and rounding it
> > up causes it to extend across a page boundary into a page that is not
> > mapped by the IOMMU/SMMU?
> >
>
> What is the IOMMU/SMMU ?
>
> The mpc8xx, mpc82xx and mpc83xx which embed the Talitos Security Engine don't have such thing, the
> security engine uses DMA and has direct access to the memory bus for reading and writing.
>

OK, good. So the only case where this could break is when the DMA
access spills over into a page that does not exist, and I suppose this
could only happen if the transfer involves a buffer located at the
very top of DRAM, right?
Christophe Leroy Jan. 21, 2021, 10:12 a.m. UTC | #6
Le 21/01/2021 à 11:02, Ard Biesheuvel a écrit :
> On Thu, 21 Jan 2021 at 10:54, Christophe Leroy
> <christophe.leroy@csgroup.eu> wrote:
>>
>>
>>
>> Le 21/01/2021 à 08:31, Ard Biesheuvel a écrit :
>>> On Thu, 21 Jan 2021 at 06:35, Christophe Leroy
>>> <christophe.leroy@csgroup.eu> wrote:
>>>>
>>>>
>>>>
>>>> Le 20/01/2021 à 23:23, Ard Biesheuvel a écrit :
>>>>> On Wed, 20 Jan 2021 at 19:59, Christophe Leroy
>>>>> <christophe.leroy@csgroup.eu> wrote:
>>>>>>
>>>>>> Talitos Security Engine AESU considers any input
>>>>>> data size that is not a multiple of 16 bytes to be an error.
>>>>>> This is not a problem in general, except for Counter mode
>>>>>> that is a stream cipher and can have an input of any size.
>>>>>>
>>>>>> Test Manager for ctr(aes) fails on 4th test vector which has
>>>>>> a length of 499 while all previous vectors which have a 16 bytes
>>>>>> multiple length succeed.
>>>>>>
>>>>>> As suggested by Freescale, round up the input data length to the
>>>>>> nearest 16 bytes.
>>>>>>
>>>>>> Fixes: 5e75ae1b3cef ("crypto: talitos - add new crypto modes")
>>>>>> Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
>>>>>
>>>>> Doesn't this cause the hardware to write outside the given buffer?
>>>>
>>>>
>>>> Only the input length is modified. Not the output length.
>>>>
>>>> The ERRATA says:
>>>>
>>>> The input data length (in the descriptor) can be rounded up to the nearest 16B. Set the
>>>> data-in length (in the descriptor) to include X bytes of data beyond the payload. Set the
>>>> data-out length to only output the relevant payload (don't need to output the padding).
>>>> SEC reads from memory are not destructive, so the extra bytes included in the AES-CTR
>>>> operation can be whatever bytes are contiguously trailing the payload.
>>>
>>> So what happens if the input is not 16 byte aligned, and rounding it
>>> up causes it to extend across a page boundary into a page that is not
>>> mapped by the IOMMU/SMMU?
>>>
>>
>> What is the IOMMU/SMMU ?
>>
>> The mpc8xx, mpc82xx and mpc83xx which embed the Talitos Security Engine don't have such thing, the
>> security engine uses DMA and has direct access to the memory bus for reading and writing.
>>
> 
> OK, good. So the only case where this could break is when the DMA
> access spills over into a page that does not exist, and I suppose this
> could only happen if the transfer involves a buffer located at the
> very top of DRAM, right?
> 

Right.

Christophe
Herbert Xu Jan. 29, 2021, 5:10 a.m. UTC | #7
On Wed, Jan 20, 2021 at 06:57:24PM +0000, Christophe Leroy wrote:
> Talitos Security Engine AESU considers any input
> data size that is not a multiple of 16 bytes to be an error.
> This is not a problem in general, except for Counter mode
> that is a stream cipher and can have an input of any size.
> 
> Test Manager for ctr(aes) fails on 4th test vector which has
> a length of 499 while all previous vectors which have a 16 bytes
> multiple length succeed.
> 
> As suggested by Freescale, round up the input data length to the
> nearest 16 bytes.
> 
> Fixes: 5e75ae1b3cef ("crypto: talitos - add new crypto modes")
> Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
> ---
>  drivers/crypto/talitos.c | 28 ++++++++++++++++------------
>  drivers/crypto/talitos.h |  1 +
>  2 files changed, 17 insertions(+), 12 deletions(-)

All applied.  Thanks.
diff mbox series

Patch

diff --git a/drivers/crypto/talitos.c b/drivers/crypto/talitos.c
index 4fd85f31630a..b656983c1ef4 100644
--- a/drivers/crypto/talitos.c
+++ b/drivers/crypto/talitos.c
@@ -1093,11 +1093,12 @@  static void ipsec_esp_decrypt_hwauth_done(struct device *dev,
  */
 static int sg_to_link_tbl_offset(struct scatterlist *sg, int sg_count,
 				 unsigned int offset, int datalen, int elen,
-				 struct talitos_ptr *link_tbl_ptr)
+				 struct talitos_ptr *link_tbl_ptr, int align)
 {
 	int n_sg = elen ? sg_count + 1 : sg_count;
 	int count = 0;
 	int cryptlen = datalen + elen;
+	int padding = ALIGN(cryptlen, align) - cryptlen;
 
 	while (cryptlen && sg && n_sg--) {
 		unsigned int len = sg_dma_len(sg);
@@ -1121,7 +1122,7 @@  static int sg_to_link_tbl_offset(struct scatterlist *sg, int sg_count,
 			offset += datalen;
 		}
 		to_talitos_ptr(link_tbl_ptr + count,
-			       sg_dma_address(sg) + offset, len, 0);
+			       sg_dma_address(sg) + offset, sg_next(sg) ? len : len + padding, 0);
 		to_talitos_ptr_ext_set(link_tbl_ptr + count, 0, 0);
 		count++;
 		cryptlen -= len;
@@ -1144,10 +1145,11 @@  static int talitos_sg_map_ext(struct device *dev, struct scatterlist *src,
 			      unsigned int len, struct talitos_edesc *edesc,
 			      struct talitos_ptr *ptr, int sg_count,
 			      unsigned int offset, int tbl_off, int elen,
-			      bool force)
+			      bool force, int align)
 {
 	struct talitos_private *priv = dev_get_drvdata(dev);
 	bool is_sec1 = has_ftr_sec1(priv);
+	int aligned_len = ALIGN(len, align);
 
 	if (!src) {
 		to_talitos_ptr(ptr, 0, 0, is_sec1);
@@ -1155,22 +1157,22 @@  static int talitos_sg_map_ext(struct device *dev, struct scatterlist *src,
 	}
 	to_talitos_ptr_ext_set(ptr, elen, is_sec1);
 	if (sg_count == 1 && !force) {
-		to_talitos_ptr(ptr, sg_dma_address(src) + offset, len, is_sec1);
+		to_talitos_ptr(ptr, sg_dma_address(src) + offset, aligned_len, is_sec1);
 		return sg_count;
 	}
 	if (is_sec1) {
-		to_talitos_ptr(ptr, edesc->dma_link_tbl + offset, len, is_sec1);
+		to_talitos_ptr(ptr, edesc->dma_link_tbl + offset, aligned_len, is_sec1);
 		return sg_count;
 	}
 	sg_count = sg_to_link_tbl_offset(src, sg_count, offset, len, elen,
-					 &edesc->link_tbl[tbl_off]);
+					 &edesc->link_tbl[tbl_off], align);
 	if (sg_count == 1 && !force) {
 		/* Only one segment now, so no link tbl needed*/
 		copy_talitos_ptr(ptr, &edesc->link_tbl[tbl_off], is_sec1);
 		return sg_count;
 	}
 	to_talitos_ptr(ptr, edesc->dma_link_tbl +
-			    tbl_off * sizeof(struct talitos_ptr), len, is_sec1);
+			    tbl_off * sizeof(struct talitos_ptr), aligned_len, is_sec1);
 	to_talitos_ptr_ext_or(ptr, DESC_PTR_LNKTBL_JUMP, is_sec1);
 
 	return sg_count;
@@ -1182,7 +1184,7 @@  static int talitos_sg_map(struct device *dev, struct scatterlist *src,
 			  unsigned int offset, int tbl_off)
 {
 	return talitos_sg_map_ext(dev, src, len, edesc, ptr, sg_count, offset,
-				  tbl_off, 0, false);
+				  tbl_off, 0, false, 1);
 }
 
 /*
@@ -1251,7 +1253,7 @@  static int ipsec_esp(struct talitos_edesc *edesc, struct aead_request *areq,
 
 	ret = talitos_sg_map_ext(dev, areq->src, cryptlen, edesc, &desc->ptr[4],
 				 sg_count, areq->assoclen, tbl_off, elen,
-				 false);
+				 false, 1);
 
 	if (ret > 1) {
 		tbl_off += ret;
@@ -1271,7 +1273,7 @@  static int ipsec_esp(struct talitos_edesc *edesc, struct aead_request *areq,
 		elen = 0;
 	ret = talitos_sg_map_ext(dev, areq->dst, cryptlen, edesc, &desc->ptr[5],
 				 sg_count, areq->assoclen, tbl_off, elen,
-				 is_ipsec_esp && !encrypt);
+				 is_ipsec_esp && !encrypt, 1);
 	tbl_off += ret;
 
 	if (!encrypt && is_ipsec_esp) {
@@ -1577,6 +1579,8 @@  static int common_nonsnoop(struct talitos_edesc *edesc,
 	bool sync_needed = false;
 	struct talitos_private *priv = dev_get_drvdata(dev);
 	bool is_sec1 = has_ftr_sec1(priv);
+	bool is_ctr = (desc->hdr & DESC_HDR_SEL0_MASK) == DESC_HDR_SEL0_AESU &&
+		      (desc->hdr & DESC_HDR_MODE0_AESU_MASK) == DESC_HDR_MODE0_AESU_CTR;
 
 	/* first DWORD empty */
 
@@ -1597,8 +1601,8 @@  static int common_nonsnoop(struct talitos_edesc *edesc,
 	/*
 	 * cipher in
 	 */
-	sg_count = talitos_sg_map(dev, areq->src, cryptlen, edesc,
-				  &desc->ptr[3], sg_count, 0, 0);
+	sg_count = talitos_sg_map_ext(dev, areq->src, cryptlen, edesc, &desc->ptr[3],
+				      sg_count, 0, 0, 0, false, is_ctr ? 16 : 1);
 	if (sg_count > 1)
 		sync_needed = true;
 
diff --git a/drivers/crypto/talitos.h b/drivers/crypto/talitos.h
index 1469b956948a..32825119e880 100644
--- a/drivers/crypto/talitos.h
+++ b/drivers/crypto/talitos.h
@@ -344,6 +344,7 @@  static inline bool has_ftr_sec1(struct talitos_private *priv)
 
 /* primary execution unit mode (MODE0) and derivatives */
 #define	DESC_HDR_MODE0_ENCRYPT		cpu_to_be32(0x00100000)
+#define	DESC_HDR_MODE0_AESU_MASK	cpu_to_be32(0x00600000)
 #define	DESC_HDR_MODE0_AESU_CBC		cpu_to_be32(0x00200000)
 #define	DESC_HDR_MODE0_AESU_CTR		cpu_to_be32(0x00600000)
 #define	DESC_HDR_MODE0_DEU_CBC		cpu_to_be32(0x00400000)