Message ID | 20200915094619.32548-1-ardb@kernel.org (mailing list archive) |
---|---|
State | Superseded |
Delegated to: | Herbert Xu |
Headers | show |
Series | crypto: arm/sha256-neon - avoid ADRL pseudo instruction | expand |
On Tue, Sep 15, 2020 at 2:46 AM Ard Biesheuvel <ardb@kernel.org> wrote: > > The ADRL pseudo instruction is not an architectural construct, but a > convenience macro that was supported by the ARM proprietary assembler > and adopted by binutils GAS as well, but only when assembling in 32-bit > ARM mode. Therefore, it can only be used in assembler code that is known > to assemble in ARM mode only, but as it turns out, the Clang assembler > does not implement ADRL at all, and so it is better to get rid of it > entirely. > > So replace the ADRL instruction with a ADR instruction that refers to > a nearer symbol, and apply the delta explicitly using an additional > instruction. > > Cc: Nick Desaulniers <ndesaulniers@google.com> > Cc: Stefan Agner <stefan@agner.ch> > Cc: Peter Smith <Peter.Smith@arm.com> > Signed-off-by: Ard Biesheuvel <ardb@kernel.org> > --- > I will leave it to the Clang folks to decide whether this needs to be > backported and how far, but a Cc stable seems reasonable here. > > arch/arm/crypto/sha256-armv4.pl | 4 ++-- > arch/arm/crypto/sha256-core.S_shipped | 4 ++-- > 2 files changed, 4 insertions(+), 4 deletions(-) > > diff --git a/arch/arm/crypto/sha256-armv4.pl b/arch/arm/crypto/sha256-armv4.pl > index 9f96ff48e4a8..8aeb2e82f915 100644 > --- a/arch/arm/crypto/sha256-armv4.pl > +++ b/arch/arm/crypto/sha256-armv4.pl > @@ -175,7 +175,6 @@ $code=<<___; > #else > .syntax unified > # ifdef __thumb2__ > -# define adrl adr > .thumb > # else > .code 32 > @@ -471,7 +470,8 @@ sha256_block_data_order_neon: > stmdb sp!,{r4-r12,lr} > > sub $H,sp,#16*4+16 > - adrl $Ktbl,K256 > + adr $Ktbl,.Lsha256_block_data_order > + add $Ktbl,$Ktbl,#K256-.Lsha256_block_data_order > bic $H,$H,#15 @ align for 128-bit stores > mov $t2,sp > mov sp,$H @ alloca > diff --git a/arch/arm/crypto/sha256-core.S_shipped b/arch/arm/crypto/sha256-core.S_shipped > index ea04b2ab0c33..1861c4e8a5ba 100644 > --- a/arch/arm/crypto/sha256-core.S_shipped > +++ b/arch/arm/crypto/sha256-core.S_shipped > @@ -56,7 +56,6 @@ > #else > .syntax unified > # ifdef __thumb2__ > -# define adrl adr > .thumb > # else > .code 32 > @@ -1885,7 +1884,8 @@ sha256_block_data_order_neon: > stmdb sp!,{r4-r12,lr} > > sub r11,sp,#16*4+16 > - adrl r14,K256 > + adr r14,.Lsha256_block_data_order > + add r14,r14,#K256-.Lsha256_block_data_order Hi Ard, Thanks for the patch. With this patch applied: $ ARCH=arm CROSS_COMPILE=arm-linux-gnueabihf- make LLVM=1 LLVM_IAS=1 -j71 defconfig $ ARCH=arm CROSS_COMPILE=arm-linux-gnueabihf- make LLVM=1 LLVM_IAS=1 -j71 ... arch/arm/crypto/sha256-core.S:2038:2: error: out of range immediate fixup value add r14,r14,#K256-.Lsha256_block_data_order ^ :( Would the adr_l macro you wrote in https://lore.kernel.org/linux-arm-kernel/nycvar.YSQ.7.78.906.2009141003360.4095746@knanqh.ubzr/T/#t be helpful here? > bic r11,r11,#15 @ align for 128-bit stores > mov r12,sp > mov sp,r11 @ alloca > -- > 2.17.1 >
On Tue, 15 Sep 2020 at 21:50, Nick Desaulniers <ndesaulniers@google.com> wrote: > > On Tue, Sep 15, 2020 at 2:46 AM Ard Biesheuvel <ardb@kernel.org> wrote: > > > > The ADRL pseudo instruction is not an architectural construct, but a > > convenience macro that was supported by the ARM proprietary assembler > > and adopted by binutils GAS as well, but only when assembling in 32-bit > > ARM mode. Therefore, it can only be used in assembler code that is known > > to assemble in ARM mode only, but as it turns out, the Clang assembler > > does not implement ADRL at all, and so it is better to get rid of it > > entirely. > > > > So replace the ADRL instruction with a ADR instruction that refers to > > a nearer symbol, and apply the delta explicitly using an additional > > instruction. > > > > Cc: Nick Desaulniers <ndesaulniers@google.com> > > Cc: Stefan Agner <stefan@agner.ch> > > Cc: Peter Smith <Peter.Smith@arm.com> > > Signed-off-by: Ard Biesheuvel <ardb@kernel.org> > > --- > > I will leave it to the Clang folks to decide whether this needs to be > > backported and how far, but a Cc stable seems reasonable here. > > > > arch/arm/crypto/sha256-armv4.pl | 4 ++-- > > arch/arm/crypto/sha256-core.S_shipped | 4 ++-- > > 2 files changed, 4 insertions(+), 4 deletions(-) > > > > diff --git a/arch/arm/crypto/sha256-armv4.pl b/arch/arm/crypto/sha256-armv4.pl > > index 9f96ff48e4a8..8aeb2e82f915 100644 > > --- a/arch/arm/crypto/sha256-armv4.pl > > +++ b/arch/arm/crypto/sha256-armv4.pl > > @@ -175,7 +175,6 @@ $code=<<___; > > #else > > .syntax unified > > # ifdef __thumb2__ > > -# define adrl adr > > .thumb > > # else > > .code 32 > > @@ -471,7 +470,8 @@ sha256_block_data_order_neon: > > stmdb sp!,{r4-r12,lr} > > > > sub $H,sp,#16*4+16 > > - adrl $Ktbl,K256 > > + adr $Ktbl,.Lsha256_block_data_order > > + add $Ktbl,$Ktbl,#K256-.Lsha256_block_data_order > > bic $H,$H,#15 @ align for 128-bit stores > > mov $t2,sp > > mov sp,$H @ alloca > > diff --git a/arch/arm/crypto/sha256-core.S_shipped b/arch/arm/crypto/sha256-core.S_shipped > > index ea04b2ab0c33..1861c4e8a5ba 100644 > > --- a/arch/arm/crypto/sha256-core.S_shipped > > +++ b/arch/arm/crypto/sha256-core.S_shipped > > @@ -56,7 +56,6 @@ > > #else > > .syntax unified > > # ifdef __thumb2__ > > -# define adrl adr > > .thumb > > # else > > .code 32 > > @@ -1885,7 +1884,8 @@ sha256_block_data_order_neon: > > stmdb sp!,{r4-r12,lr} > > > > sub r11,sp,#16*4+16 > > - adrl r14,K256 > > + adr r14,.Lsha256_block_data_order > > + add r14,r14,#K256-.Lsha256_block_data_order > > Hi Ard, > Thanks for the patch. With this patch applied: > > $ ARCH=arm CROSS_COMPILE=arm-linux-gnueabihf- make LLVM=1 LLVM_IAS=1 > -j71 defconfig > $ ARCH=arm CROSS_COMPILE=arm-linux-gnueabihf- make LLVM=1 LLVM_IAS=1 -j71 > ... > arch/arm/crypto/sha256-core.S:2038:2: error: out of range immediate fixup value > add r14,r14,#K256-.Lsha256_block_data_order > ^ > > :( > Strange. Could you change it to sub r14,r14,#.Lsha256_block_data_order-K256 and try again? If that does work, it means the Clang assembler does not update the instruction type for negative addends (add to sub in this case), which would be unfortunate, since it would be another functionality gap. > Would the adr_l macro you wrote in > https://lore.kernel.org/linux-arm-kernel/nycvar.YSQ.7.78.906.2009141003360.4095746@knanqh.ubzr/T/#t > be helpful here? > > > bic r11,r11,#15 @ align for 128-bit stores > > mov r12,sp > > mov sp,r11 @ alloca > > -- > > 2.17.1 > > > > > -- > Thanks, > ~Nick Desaulniers
On Tue, Sep 15, 2020 at 2:32 PM Ard Biesheuvel <ardb@kernel.org> wrote: > > On Tue, 15 Sep 2020 at 21:50, Nick Desaulniers <ndesaulniers@google.com> wrote: > > > > On Tue, Sep 15, 2020 at 2:46 AM Ard Biesheuvel <ardb@kernel.org> wrote: > > > > > > The ADRL pseudo instruction is not an architectural construct, but a > > > convenience macro that was supported by the ARM proprietary assembler > > > and adopted by binutils GAS as well, but only when assembling in 32-bit > > > ARM mode. Therefore, it can only be used in assembler code that is known > > > to assemble in ARM mode only, but as it turns out, the Clang assembler > > > does not implement ADRL at all, and so it is better to get rid of it > > > entirely. > > > > > > So replace the ADRL instruction with a ADR instruction that refers to > > > a nearer symbol, and apply the delta explicitly using an additional > > > instruction. > > > > > > Cc: Nick Desaulniers <ndesaulniers@google.com> > > > Cc: Stefan Agner <stefan@agner.ch> > > > Cc: Peter Smith <Peter.Smith@arm.com> > > > Signed-off-by: Ard Biesheuvel <ardb@kernel.org> > > > --- > > > I will leave it to the Clang folks to decide whether this needs to be > > > backported and how far, but a Cc stable seems reasonable here. > > > > > > arch/arm/crypto/sha256-armv4.pl | 4 ++-- > > > arch/arm/crypto/sha256-core.S_shipped | 4 ++-- > > > 2 files changed, 4 insertions(+), 4 deletions(-) > > > > > > diff --git a/arch/arm/crypto/sha256-armv4.pl b/arch/arm/crypto/sha256-armv4.pl > > > index 9f96ff48e4a8..8aeb2e82f915 100644 > > > --- a/arch/arm/crypto/sha256-armv4.pl > > > +++ b/arch/arm/crypto/sha256-armv4.pl > > > @@ -175,7 +175,6 @@ $code=<<___; > > > #else > > > .syntax unified > > > # ifdef __thumb2__ > > > -# define adrl adr > > > .thumb > > > # else > > > .code 32 > > > @@ -471,7 +470,8 @@ sha256_block_data_order_neon: > > > stmdb sp!,{r4-r12,lr} > > > > > > sub $H,sp,#16*4+16 > > > - adrl $Ktbl,K256 > > > + adr $Ktbl,.Lsha256_block_data_order > > > + add $Ktbl,$Ktbl,#K256-.Lsha256_block_data_order > > > bic $H,$H,#15 @ align for 128-bit stores > > > mov $t2,sp > > > mov sp,$H @ alloca > > > diff --git a/arch/arm/crypto/sha256-core.S_shipped b/arch/arm/crypto/sha256-core.S_shipped > > > index ea04b2ab0c33..1861c4e8a5ba 100644 > > > --- a/arch/arm/crypto/sha256-core.S_shipped > > > +++ b/arch/arm/crypto/sha256-core.S_shipped > > > @@ -56,7 +56,6 @@ > > > #else > > > .syntax unified > > > # ifdef __thumb2__ > > > -# define adrl adr > > > .thumb > > > # else > > > .code 32 > > > @@ -1885,7 +1884,8 @@ sha256_block_data_order_neon: > > > stmdb sp!,{r4-r12,lr} > > > > > > sub r11,sp,#16*4+16 > > > - adrl r14,K256 > > > + adr r14,.Lsha256_block_data_order > > > + add r14,r14,#K256-.Lsha256_block_data_order > > > > Hi Ard, > > Thanks for the patch. With this patch applied: > > > > $ ARCH=arm CROSS_COMPILE=arm-linux-gnueabihf- make LLVM=1 LLVM_IAS=1 > > -j71 defconfig > > $ ARCH=arm CROSS_COMPILE=arm-linux-gnueabihf- make LLVM=1 LLVM_IAS=1 -j71 > > ... > > arch/arm/crypto/sha256-core.S:2038:2: error: out of range immediate fixup value > > add r14,r14,#K256-.Lsha256_block_data_order > > ^ > > > > :( > > > > Strange. Could you change it to > > sub r14,r14,#.Lsha256_block_data_order-K256 > > and try again? > > If that does work, it means the Clang assembler does not update the > instruction type for negative addends (add to sub in this case), which > would be unfortunate, since it would be another functionality gap. Works. Can you describe the expected functionality a bit more, so we can come up with a bug report/test case? (an `add` with a negative operand should be converted to a `sub` with a positive operand, IIUC?) Also, there's a similar adrl in arch/arm/crypto/sha512-core.S, err, is that generated?
On Wed, 16 Sep 2020 at 02:55, Nick Desaulniers <ndesaulniers@google.com> wrote: > > On Tue, Sep 15, 2020 at 2:32 PM Ard Biesheuvel <ardb@kernel.org> wrote: > > > > On Tue, 15 Sep 2020 at 21:50, Nick Desaulniers <ndesaulniers@google.com> wrote: > > > > > > On Tue, Sep 15, 2020 at 2:46 AM Ard Biesheuvel <ardb@kernel.org> wrote: > > > > > > > > The ADRL pseudo instruction is not an architectural construct, but a > > > > convenience macro that was supported by the ARM proprietary assembler > > > > and adopted by binutils GAS as well, but only when assembling in 32-bit > > > > ARM mode. Therefore, it can only be used in assembler code that is known > > > > to assemble in ARM mode only, but as it turns out, the Clang assembler > > > > does not implement ADRL at all, and so it is better to get rid of it > > > > entirely. > > > > > > > > So replace the ADRL instruction with a ADR instruction that refers to > > > > a nearer symbol, and apply the delta explicitly using an additional > > > > instruction. > > > > > > > > Cc: Nick Desaulniers <ndesaulniers@google.com> > > > > Cc: Stefan Agner <stefan@agner.ch> > > > > Cc: Peter Smith <Peter.Smith@arm.com> > > > > Signed-off-by: Ard Biesheuvel <ardb@kernel.org> > > > > --- > > > > I will leave it to the Clang folks to decide whether this needs to be > > > > backported and how far, but a Cc stable seems reasonable here. > > > > > > > > arch/arm/crypto/sha256-armv4.pl | 4 ++-- > > > > arch/arm/crypto/sha256-core.S_shipped | 4 ++-- > > > > 2 files changed, 4 insertions(+), 4 deletions(-) > > > > > > > > diff --git a/arch/arm/crypto/sha256-armv4.pl b/arch/arm/crypto/sha256-armv4.pl > > > > index 9f96ff48e4a8..8aeb2e82f915 100644 > > > > --- a/arch/arm/crypto/sha256-armv4.pl > > > > +++ b/arch/arm/crypto/sha256-armv4.pl > > > > @@ -175,7 +175,6 @@ $code=<<___; > > > > #else > > > > .syntax unified > > > > # ifdef __thumb2__ > > > > -# define adrl adr > > > > .thumb > > > > # else > > > > .code 32 > > > > @@ -471,7 +470,8 @@ sha256_block_data_order_neon: > > > > stmdb sp!,{r4-r12,lr} > > > > > > > > sub $H,sp,#16*4+16 > > > > - adrl $Ktbl,K256 > > > > + adr $Ktbl,.Lsha256_block_data_order > > > > + add $Ktbl,$Ktbl,#K256-.Lsha256_block_data_order > > > > bic $H,$H,#15 @ align for 128-bit stores > > > > mov $t2,sp > > > > mov sp,$H @ alloca > > > > diff --git a/arch/arm/crypto/sha256-core.S_shipped b/arch/arm/crypto/sha256-core.S_shipped > > > > index ea04b2ab0c33..1861c4e8a5ba 100644 > > > > --- a/arch/arm/crypto/sha256-core.S_shipped > > > > +++ b/arch/arm/crypto/sha256-core.S_shipped > > > > @@ -56,7 +56,6 @@ > > > > #else > > > > .syntax unified > > > > # ifdef __thumb2__ > > > > -# define adrl adr > > > > .thumb > > > > # else > > > > .code 32 > > > > @@ -1885,7 +1884,8 @@ sha256_block_data_order_neon: > > > > stmdb sp!,{r4-r12,lr} > > > > > > > > sub r11,sp,#16*4+16 > > > > - adrl r14,K256 > > > > + adr r14,.Lsha256_block_data_order > > > > + add r14,r14,#K256-.Lsha256_block_data_order > > > > > > Hi Ard, > > > Thanks for the patch. With this patch applied: > > > > > > $ ARCH=arm CROSS_COMPILE=arm-linux-gnueabihf- make LLVM=1 LLVM_IAS=1 > > > -j71 defconfig > > > $ ARCH=arm CROSS_COMPILE=arm-linux-gnueabihf- make LLVM=1 LLVM_IAS=1 -j71 > > > ... > > > arch/arm/crypto/sha256-core.S:2038:2: error: out of range immediate fixup value > > > add r14,r14,#K256-.Lsha256_block_data_order > > > ^ > > > > > > :( > > > > > > > Strange. Could you change it to > > > > sub r14,r14,#.Lsha256_block_data_order-K256 > > > > and try again? > > > > If that does work, it means the Clang assembler does not update the > > instruction type for negative addends (add to sub in this case), which > > would be unfortunate, since it would be another functionality gap. > > Works. Can you describe the expected functionality a bit more, so we > can come up with a bug report/test case? (an `add` with a negative > operand should be converted to a `sub` with a positive operand, IIUC?) > That is it, really. Not sure if this is laid out in a spec anywhere, although the ELF psABI for ARM covers some similar territory when it comes to turning add into sub instructions and vice versa, as well as manipulating the U bit of LDR instructions. > Also, there's a similar adrl in arch/arm/crypto/sha512-core.S, err, is > that generated? Indeed. I missed that one as it has been removed from the upstream OpenSSL version, but I'll add a fix there as well.
On 2020-09-15 23:31, Ard Biesheuvel wrote: > On Tue, 15 Sep 2020 at 21:50, Nick Desaulniers <ndesaulniers@google.com> wrote: >> >> On Tue, Sep 15, 2020 at 2:46 AM Ard Biesheuvel <ardb@kernel.org> wrote: >> > >> > The ADRL pseudo instruction is not an architectural construct, but a >> > convenience macro that was supported by the ARM proprietary assembler >> > and adopted by binutils GAS as well, but only when assembling in 32-bit >> > ARM mode. Therefore, it can only be used in assembler code that is known >> > to assemble in ARM mode only, but as it turns out, the Clang assembler >> > does not implement ADRL at all, and so it is better to get rid of it >> > entirely. >> > >> > So replace the ADRL instruction with a ADR instruction that refers to >> > a nearer symbol, and apply the delta explicitly using an additional >> > instruction. >> > >> > Cc: Nick Desaulniers <ndesaulniers@google.com> >> > Cc: Stefan Agner <stefan@agner.ch> >> > Cc: Peter Smith <Peter.Smith@arm.com> >> > Signed-off-by: Ard Biesheuvel <ardb@kernel.org> >> > --- >> > I will leave it to the Clang folks to decide whether this needs to be >> > backported and how far, but a Cc stable seems reasonable here. >> > >> > arch/arm/crypto/sha256-armv4.pl | 4 ++-- >> > arch/arm/crypto/sha256-core.S_shipped | 4 ++-- >> > 2 files changed, 4 insertions(+), 4 deletions(-) >> > >> > diff --git a/arch/arm/crypto/sha256-armv4.pl b/arch/arm/crypto/sha256-armv4.pl >> > index 9f96ff48e4a8..8aeb2e82f915 100644 >> > --- a/arch/arm/crypto/sha256-armv4.pl >> > +++ b/arch/arm/crypto/sha256-armv4.pl >> > @@ -175,7 +175,6 @@ $code=<<___; >> > #else >> > .syntax unified >> > # ifdef __thumb2__ >> > -# define adrl adr >> > .thumb >> > # else >> > .code 32 >> > @@ -471,7 +470,8 @@ sha256_block_data_order_neon: >> > stmdb sp!,{r4-r12,lr} >> > >> > sub $H,sp,#16*4+16 >> > - adrl $Ktbl,K256 >> > + adr $Ktbl,.Lsha256_block_data_order >> > + add $Ktbl,$Ktbl,#K256-.Lsha256_block_data_order >> > bic $H,$H,#15 @ align for 128-bit stores >> > mov $t2,sp >> > mov sp,$H @ alloca >> > diff --git a/arch/arm/crypto/sha256-core.S_shipped b/arch/arm/crypto/sha256-core.S_shipped >> > index ea04b2ab0c33..1861c4e8a5ba 100644 >> > --- a/arch/arm/crypto/sha256-core.S_shipped >> > +++ b/arch/arm/crypto/sha256-core.S_shipped >> > @@ -56,7 +56,6 @@ >> > #else >> > .syntax unified >> > # ifdef __thumb2__ >> > -# define adrl adr >> > .thumb >> > # else >> > .code 32 >> > @@ -1885,7 +1884,8 @@ sha256_block_data_order_neon: >> > stmdb sp!,{r4-r12,lr} >> > >> > sub r11,sp,#16*4+16 >> > - adrl r14,K256 >> > + adr r14,.Lsha256_block_data_order >> > + add r14,r14,#K256-.Lsha256_block_data_order >> >> Hi Ard, >> Thanks for the patch. With this patch applied: >> >> $ ARCH=arm CROSS_COMPILE=arm-linux-gnueabihf- make LLVM=1 LLVM_IAS=1 >> -j71 defconfig >> $ ARCH=arm CROSS_COMPILE=arm-linux-gnueabihf- make LLVM=1 LLVM_IAS=1 -j71 >> ... >> arch/arm/crypto/sha256-core.S:2038:2: error: out of range immediate fixup value >> add r14,r14,#K256-.Lsha256_block_data_order >> ^ >> >> :( >> > > Strange. Could you change it to > > sub r14,r14,#.Lsha256_block_data_order-K256 > > and try again? > > If that does work, it means the Clang assembler does not update the > instruction type for negative addends (add to sub in this case), which > would be unfortunate, since it would be another functionality gap. Hm interesting, I did not come across another instance where this was a problem. In this particular case, is it guaranteed to be a subtraction? I guess then using sub for now would be fine...? FWIW, we discussed possible solution also in this issue (mach-omap2/sleep34xx.S case is handled already): https://github.com/ClangBuiltLinux/linux/issues/430 -- Stefan > > > >> Would the adr_l macro you wrote in >> https://lore.kernel.org/linux-arm-kernel/nycvar.YSQ.7.78.906.2009141003360.4095746@knanqh.ubzr/T/#t >> be helpful here? >> >> > bic r11,r11,#15 @ align for 128-bit stores >> > mov r12,sp >> > mov sp,r11 @ alloca >> > -- >> > 2.17.1 >> > >> >> >> -- >> Thanks, >> ~Nick Desaulniers
On Wed, 16 Sep 2020 at 10:45, Stefan Agner <stefan@agner.ch> wrote: > > On 2020-09-15 23:31, Ard Biesheuvel wrote: > > On Tue, 15 Sep 2020 at 21:50, Nick Desaulniers <ndesaulniers@google.com> wrote: > >> > >> On Tue, Sep 15, 2020 at 2:46 AM Ard Biesheuvel <ardb@kernel.org> wrote: > >> > > >> > The ADRL pseudo instruction is not an architectural construct, but a > >> > convenience macro that was supported by the ARM proprietary assembler > >> > and adopted by binutils GAS as well, but only when assembling in 32-bit > >> > ARM mode. Therefore, it can only be used in assembler code that is known > >> > to assemble in ARM mode only, but as it turns out, the Clang assembler > >> > does not implement ADRL at all, and so it is better to get rid of it > >> > entirely. > >> > > >> > So replace the ADRL instruction with a ADR instruction that refers to > >> > a nearer symbol, and apply the delta explicitly using an additional > >> > instruction. > >> > > >> > Cc: Nick Desaulniers <ndesaulniers@google.com> > >> > Cc: Stefan Agner <stefan@agner.ch> > >> > Cc: Peter Smith <Peter.Smith@arm.com> > >> > Signed-off-by: Ard Biesheuvel <ardb@kernel.org> > >> > --- > >> > I will leave it to the Clang folks to decide whether this needs to be > >> > backported and how far, but a Cc stable seems reasonable here. > >> > > >> > arch/arm/crypto/sha256-armv4.pl | 4 ++-- > >> > arch/arm/crypto/sha256-core.S_shipped | 4 ++-- > >> > 2 files changed, 4 insertions(+), 4 deletions(-) > >> > > >> > diff --git a/arch/arm/crypto/sha256-armv4.pl b/arch/arm/crypto/sha256-armv4.pl > >> > index 9f96ff48e4a8..8aeb2e82f915 100644 > >> > --- a/arch/arm/crypto/sha256-armv4.pl > >> > +++ b/arch/arm/crypto/sha256-armv4.pl > >> > @@ -175,7 +175,6 @@ $code=<<___; > >> > #else > >> > .syntax unified > >> > # ifdef __thumb2__ > >> > -# define adrl adr > >> > .thumb > >> > # else > >> > .code 32 > >> > @@ -471,7 +470,8 @@ sha256_block_data_order_neon: > >> > stmdb sp!,{r4-r12,lr} > >> > > >> > sub $H,sp,#16*4+16 > >> > - adrl $Ktbl,K256 > >> > + adr $Ktbl,.Lsha256_block_data_order > >> > + add $Ktbl,$Ktbl,#K256-.Lsha256_block_data_order > >> > bic $H,$H,#15 @ align for 128-bit stores > >> > mov $t2,sp > >> > mov sp,$H @ alloca > >> > diff --git a/arch/arm/crypto/sha256-core.S_shipped b/arch/arm/crypto/sha256-core.S_shipped > >> > index ea04b2ab0c33..1861c4e8a5ba 100644 > >> > --- a/arch/arm/crypto/sha256-core.S_shipped > >> > +++ b/arch/arm/crypto/sha256-core.S_shipped > >> > @@ -56,7 +56,6 @@ > >> > #else > >> > .syntax unified > >> > # ifdef __thumb2__ > >> > -# define adrl adr > >> > .thumb > >> > # else > >> > .code 32 > >> > @@ -1885,7 +1884,8 @@ sha256_block_data_order_neon: > >> > stmdb sp!,{r4-r12,lr} > >> > > >> > sub r11,sp,#16*4+16 > >> > - adrl r14,K256 > >> > + adr r14,.Lsha256_block_data_order > >> > + add r14,r14,#K256-.Lsha256_block_data_order > >> > >> Hi Ard, > >> Thanks for the patch. With this patch applied: > >> > >> $ ARCH=arm CROSS_COMPILE=arm-linux-gnueabihf- make LLVM=1 LLVM_IAS=1 > >> -j71 defconfig > >> $ ARCH=arm CROSS_COMPILE=arm-linux-gnueabihf- make LLVM=1 LLVM_IAS=1 -j71 > >> ... > >> arch/arm/crypto/sha256-core.S:2038:2: error: out of range immediate fixup value > >> add r14,r14,#K256-.Lsha256_block_data_order > >> ^ > >> > >> :( > >> > > > > Strange. Could you change it to > > > > sub r14,r14,#.Lsha256_block_data_order-K256 > > > > and try again? > > > > If that does work, it means the Clang assembler does not update the > > instruction type for negative addends (add to sub in this case), which > > would be unfortunate, since it would be another functionality gap. > > Hm interesting, I did not come across another instance where this was a > problem. > > In this particular case, is it guaranteed to be a subtraction? I guess > then using sub for now would be fine...? > Yes for this code it is fine. > FWIW, we discussed possible solution also in this issue > (mach-omap2/sleep34xx.S case is handled already): > https://github.com/ClangBuiltLinux/linux/issues/430 > > -- > Stefan > > > > > > > > >> Would the adr_l macro you wrote in > >> https://lore.kernel.org/linux-arm-kernel/nycvar.YSQ.7.78.906.2009141003360.4095746@knanqh.ubzr/T/#t > >> be helpful here? > >> > >> > bic r11,r11,#15 @ align for 128-bit stores > >> > mov r12,sp > >> > mov sp,r11 @ alloca > >> > -- > >> > 2.17.1 > >> > > >> > >> > >> -- > >> Thanks, > >> ~Nick Desaulniers
diff --git a/arch/arm/crypto/sha256-armv4.pl b/arch/arm/crypto/sha256-armv4.pl index 9f96ff48e4a8..8aeb2e82f915 100644 --- a/arch/arm/crypto/sha256-armv4.pl +++ b/arch/arm/crypto/sha256-armv4.pl @@ -175,7 +175,6 @@ $code=<<___; #else .syntax unified # ifdef __thumb2__ -# define adrl adr .thumb # else .code 32 @@ -471,7 +470,8 @@ sha256_block_data_order_neon: stmdb sp!,{r4-r12,lr} sub $H,sp,#16*4+16 - adrl $Ktbl,K256 + adr $Ktbl,.Lsha256_block_data_order + add $Ktbl,$Ktbl,#K256-.Lsha256_block_data_order bic $H,$H,#15 @ align for 128-bit stores mov $t2,sp mov sp,$H @ alloca diff --git a/arch/arm/crypto/sha256-core.S_shipped b/arch/arm/crypto/sha256-core.S_shipped index ea04b2ab0c33..1861c4e8a5ba 100644 --- a/arch/arm/crypto/sha256-core.S_shipped +++ b/arch/arm/crypto/sha256-core.S_shipped @@ -56,7 +56,6 @@ #else .syntax unified # ifdef __thumb2__ -# define adrl adr .thumb # else .code 32 @@ -1885,7 +1884,8 @@ sha256_block_data_order_neon: stmdb sp!,{r4-r12,lr} sub r11,sp,#16*4+16 - adrl r14,K256 + adr r14,.Lsha256_block_data_order + add r14,r14,#K256-.Lsha256_block_data_order bic r11,r11,#15 @ align for 128-bit stores mov r12,sp mov sp,r11 @ alloca
The ADRL pseudo instruction is not an architectural construct, but a convenience macro that was supported by the ARM proprietary assembler and adopted by binutils GAS as well, but only when assembling in 32-bit ARM mode. Therefore, it can only be used in assembler code that is known to assemble in ARM mode only, but as it turns out, the Clang assembler does not implement ADRL at all, and so it is better to get rid of it entirely. So replace the ADRL instruction with a ADR instruction that refers to a nearer symbol, and apply the delta explicitly using an additional instruction. Cc: Nick Desaulniers <ndesaulniers@google.com> Cc: Stefan Agner <stefan@agner.ch> Cc: Peter Smith <Peter.Smith@arm.com> Signed-off-by: Ard Biesheuvel <ardb@kernel.org> --- I will leave it to the Clang folks to decide whether this needs to be backported and how far, but a Cc stable seems reasonable here. arch/arm/crypto/sha256-armv4.pl | 4 ++-- arch/arm/crypto/sha256-core.S_shipped | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-)