diff mbox

your mail

Message ID 20150421125049.GW12732@n2100.arm.linux.org.uk (mailing list archive)
State New, archived
Headers show

Commit Message

Russell King - ARM Linux April 21, 2015, 12:50 p.m. UTC
On Tue, Apr 21, 2015 at 12:24:20PM +0100, Russell King - ARM Linux wrote:
> We should probably create a badr macro to complement the adr pseudo-op
> which incorporates the BSYM thing so make this clearer - and a comment
> before it.  This is really the case where BSYM should be used.

Something like this.  Note that I've also removed the BSYM() usage in
the KVM code.

 arch/arm/boot/compressed/head.S          |  4 ++--
 arch/arm/common/mcpm_head.S              |  2 +-
 arch/arm/include/asm/assembler.h         | 17 ++++++++++++++++-
 arch/arm/include/asm/entry-macro-multi.S |  4 ++--
 arch/arm/include/asm/unified.h           |  2 --
 arch/arm/kernel/entry-armv.S             | 12 ++++++------
 arch/arm/kernel/entry-common.S           |  6 +++---
 arch/arm/kernel/entry-ftrace.S           |  2 +-
 arch/arm/kernel/head-nommu.S             |  6 +++---
 arch/arm/kernel/head.S                   |  8 ++++----
 arch/arm/kernel/sleep.S                  |  2 +-
 arch/arm/kvm/interrupts.S                |  2 +-
 arch/arm/lib/call_with_stack.S           |  2 +-
 arch/arm/mm/proc-v7m.S                   |  2 +-
 14 files changed, 42 insertions(+), 29 deletions(-)

Comments

Ard Biesheuvel April 21, 2015, 1:10 p.m. UTC | #1
On 21 April 2015 at 14:50, Russell King - ARM Linux
<linux@arm.linux.org.uk> wrote:
> On Tue, Apr 21, 2015 at 12:24:20PM +0100, Russell King - ARM Linux wrote:
>> We should probably create a badr macro to complement the adr pseudo-op
>> which incorporates the BSYM thing so make this clearer - and a comment
>> before it.  This is really the case where BSYM should be used.
>
> Something like this.  Note that I've also removed the BSYM() usage in
> the KVM code.
>

Yes, that is much better. It is a pity that we still can't use '| 1'
but the fact that you are forced to use 'adr' now probably mostly
eliminates the risk regarding that.

I did notice that are are 4 or 5 instances (commented inline) of an
ARM to thumb mode switch which can just as easily be implemented as
'blx 1f' instead of using this badr macro (whose use we want to
discourage, I assume, since the address arithmetic is still slightly
dodgy). Do you think we should do something about that as well?

>  arch/arm/boot/compressed/head.S          |  4 ++--
>  arch/arm/common/mcpm_head.S              |  2 +-
>  arch/arm/include/asm/assembler.h         | 17 ++++++++++++++++-
>  arch/arm/include/asm/entry-macro-multi.S |  4 ++--
>  arch/arm/include/asm/unified.h           |  2 --
>  arch/arm/kernel/entry-armv.S             | 12 ++++++------
>  arch/arm/kernel/entry-common.S           |  6 +++---
>  arch/arm/kernel/entry-ftrace.S           |  2 +-
>  arch/arm/kernel/head-nommu.S             |  6 +++---
>  arch/arm/kernel/head.S                   |  8 ++++----
>  arch/arm/kernel/sleep.S                  |  2 +-
>  arch/arm/kvm/interrupts.S                |  2 +-
>  arch/arm/lib/call_with_stack.S           |  2 +-
>  arch/arm/mm/proc-v7m.S                   |  2 +-
>  14 files changed, 42 insertions(+), 29 deletions(-)
>
> diff --git a/arch/arm/boot/compressed/head.S b/arch/arm/boot/compressed/head.S
> index 2c45b5709fa4..06e983f59980 100644
> --- a/arch/arm/boot/compressed/head.S
> +++ b/arch/arm/boot/compressed/head.S
> @@ -130,7 +130,7 @@ start:
>                 .endr
>     ARM(                mov     r0, r0          )
>     ARM(                b       1f              )
> - THUMB(                adr     r12, BSYM(1f)   )
> + THUMB(                badr    r12, 1f         )
>   THUMB(                bx      r12             )
>

blx 1f

>                 .word   _magic_sig      @ Magic numbers to help the loader
> @@ -447,7 +447,7 @@ dtb_check_done:
>
>                 bl      cache_clean_flush
>
> -               adr     r0, BSYM(restart)
> +               badr    r0, restart
>                 add     r0, r0, r6
>                 mov     pc, r0
>
> diff --git a/arch/arm/common/mcpm_head.S b/arch/arm/common/mcpm_head.S
> index e02db4b81a66..08b3bb9bc6a2 100644
> --- a/arch/arm/common/mcpm_head.S
> +++ b/arch/arm/common/mcpm_head.S
> @@ -49,7 +49,7 @@
>  ENTRY(mcpm_entry_point)
>
>   ARM_BE8(setend        be)
> - THUMB(        adr     r12, BSYM(1f)   )
> + THUMB(        badr    r12, 1f         )
>   THUMB(        bx      r12             )
>   THUMB(        .thumb                  )
>  1:

likewise

> [...]
> diff --git a/arch/arm/kernel/head-nommu.S b/arch/arm/kernel/head-nommu.S
> index 84da14b7cd04..c9660167ef1a 100644
> --- a/arch/arm/kernel/head-nommu.S
> +++ b/arch/arm/kernel/head-nommu.S
> @@ -46,7 +46,7 @@ ENTRY(stext)
>         .arm
>  ENTRY(stext)
>
> - THUMB(        adr     r9, BSYM(1f)    )       @ Kernel is always entered in ARM.
> + THUMB(        badr    r9, 1f          )       @ Kernel is always entered in ARM.
>   THUMB(        bx      r9              )       @ If this is a Thumb-2 kernel,
>   THUMB(        .thumb                  )       @ switch to Thumb now.
>   THUMB(1:                      )

likewise

> @@ -79,7 +79,7 @@ ENTRY(stext)
>  #endif
>         ldr     r13, =__mmap_switched           @ address to jump to after
>                                                 @ initialising sctlr
> -       adr     lr, BSYM(1f)                    @ return (PIC) address
> +       badr    lr, 1f                          @ return (PIC) address
>         ldr     r12, [r10, #PROCINFO_INITFUNC]
>         add     r12, r12, r10
>         ret     r12
> @@ -115,7 +115,7 @@ ENTRY(secondary_startup)
>         bl      __setup_mpu                     @ Initialize the MPU
>  #endif
>
> -       adr     lr, BSYM(__after_proc_init)     @ return address
> +       badr    lr, __after_proc_init           @ return address
>         mov     r13, r12                        @ __secondary_switched address
>         ldr     r12, [r10, #PROCINFO_INITFUNC]
>         add     r12, r12, r10
> diff --git a/arch/arm/kernel/head.S b/arch/arm/kernel/head.S
> index 7304b4c44b52..1eecd57453be 100644
> --- a/arch/arm/kernel/head.S
> +++ b/arch/arm/kernel/head.S
> @@ -80,7 +80,7 @@
>  ENTRY(stext)
>   ARM_BE8(setend        be )                    @ ensure we are in BE8 mode
>
> - THUMB(        adr     r9, BSYM(1f)    )       @ Kernel is always entered in ARM.
> + THUMB(        badr    r9, 1f          )       @ Kernel is always entered in ARM.
>   THUMB(        bx      r9              )       @ If this is a Thumb-2 kernel,
>   THUMB(        .thumb                  )       @ switch to Thumb now.
>   THUMB(1:                      )

likewise

> @@ -148,7 +148,7 @@ ENTRY(stext)
>          */
>         ldr     r13, =__mmap_switched           @ address to jump to after
>                                                 @ mmu has been enabled
> -       adr     lr, BSYM(1f)                    @ return (PIC) address
> +       badr    lr, 1f                          @ return (PIC) address
>  #ifdef CONFIG_ARM_LPAE
>         mov     r5, #0                          @ high TTBR0
>         mov     r8, r4, lsr #12                 @ TTBR1 is swapper_pg_dir pfn
> @@ -364,7 +364,7 @@ __turn_mmu_on_loc:
>         .text
>  ENTRY(secondary_startup_arm)
>         .arm
> - THUMB(        adr     r9, BSYM(1f)    )       @ Kernel is entered in ARM.
> + THUMB(        badr    r9, 1f          )       @ Kernel is entered in ARM.
>   THUMB(        bx      r9              )       @ If this is a Thumb-2 kernel,
>   THUMB(        .thumb                  )       @ switch to Thumb now.
>   THUMB(1:                      )

likewise
Dave Martin April 21, 2015, 1:18 p.m. UTC | #2
On Tue, Apr 21, 2015 at 01:50:50PM +0100, Russell King - ARM Linux wrote:
> On Tue, Apr 21, 2015 at 12:24:20PM +0100, Russell King - ARM Linux wrote:
> > We should probably create a badr macro to complement the adr pseudo-op
> > which incorporates the BSYM thing so make this clearer - and a comment
> > before it.  This is really the case where BSYM should be used.
> 
> Something like this.  Note that I've also removed the BSYM() usage in
> the KVM code.

Nice.  Wrapping this around adr will make the assembler check that it's
not a cross-section reference too.

>  arch/arm/boot/compressed/head.S          |  4 ++--
>  arch/arm/common/mcpm_head.S              |  2 +-
>  arch/arm/include/asm/assembler.h         | 17 ++++++++++++++++-
>  arch/arm/include/asm/entry-macro-multi.S |  4 ++--
>  arch/arm/include/asm/unified.h           |  2 --
>  arch/arm/kernel/entry-armv.S             | 12 ++++++------
>  arch/arm/kernel/entry-common.S           |  6 +++---
>  arch/arm/kernel/entry-ftrace.S           |  2 +-
>  arch/arm/kernel/head-nommu.S             |  6 +++---
>  arch/arm/kernel/head.S                   |  8 ++++----
>  arch/arm/kernel/sleep.S                  |  2 +-
>  arch/arm/kvm/interrupts.S                |  2 +-
>  arch/arm/lib/call_with_stack.S           |  2 +-
>  arch/arm/mm/proc-v7m.S                   |  2 +-
>  14 files changed, 42 insertions(+), 29 deletions(-)
> 
> diff --git a/arch/arm/boot/compressed/head.S b/arch/arm/boot/compressed/head.S
> index 2c45b5709fa4..06e983f59980 100644
> --- a/arch/arm/boot/compressed/head.S
> +++ b/arch/arm/boot/compressed/head.S
> @@ -130,7 +130,7 @@ start:
>                 .endr
>     ARM(                mov     r0, r0          )
>     ARM(                b       1f              )
> - THUMB(                adr     r12, BSYM(1f)   )
> + THUMB(                badr    r12, 1f         )
>   THUMB(                bx      r12             )
> 
>                 .word   _magic_sig      @ Magic numbers to help the loader
> @@ -447,7 +447,7 @@ dtb_check_done:
> 
>                 bl      cache_clean_flush
> 
> -               adr     r0, BSYM(restart)
> +               badr    r0, restart
>                 add     r0, r0, r6
>                 mov     pc, r0
> 
> diff --git a/arch/arm/common/mcpm_head.S b/arch/arm/common/mcpm_head.S
> index e02db4b81a66..08b3bb9bc6a2 100644
> --- a/arch/arm/common/mcpm_head.S
> +++ b/arch/arm/common/mcpm_head.S
> @@ -49,7 +49,7 @@
>  ENTRY(mcpm_entry_point)
> 
>   ARM_BE8(setend        be)
> - THUMB(        adr     r12, BSYM(1f)   )
> + THUMB(        badr    r12, 1f         )
>   THUMB(        bx      r12             )
>   THUMB(        .thumb                  )
>  1:
> diff --git a/arch/arm/include/asm/assembler.h b/arch/arm/include/asm/assembler.h
> index 186270b3e194..4abe57279c66 100644
> --- a/arch/arm/include/asm/assembler.h
> +++ b/arch/arm/include/asm/assembler.h
> @@ -178,6 +178,21 @@
>         .endm
> 
>  /*
> + * Assembly version of "adr rd, BSYM(sym)".  This should only be used to
> + * reference local symbols in the same assembly file which are to be
> + * resolved by the assembler.  Other usage is undefined.

BSYM() is gone, so this comment shouldn't refer to it...

> + */
> +       .irp    c,,eq,ne,cs,cc,mi,pl,vs,vc,hi,ls,ge,lt,gt,le,hs,lo

This wrap-macro-with-cc idiom could be factored, but it may not be worth
it just yet.

[...]

Cheers
---Dave
Dave Martin April 21, 2015, 1:21 p.m. UTC | #3
On Tue, Apr 21, 2015 at 02:10:42PM +0100, Ard Biesheuvel wrote:
> On 21 April 2015 at 14:50, Russell King - ARM Linux
> <linux@arm.linux.org.uk> wrote:
> > On Tue, Apr 21, 2015 at 12:24:20PM +0100, Russell King - ARM Linux wrote:
> >> We should probably create a badr macro to complement the adr pseudo-op
> >> which incorporates the BSYM thing so make this clearer - and a comment
> >> before it.  This is really the case where BSYM should be used.
> >
> > Something like this.  Note that I've also removed the BSYM() usage in
> > the KVM code.
> >
> 
> Yes, that is much better. It is a pity that we still can't use '| 1'
> but the fact that you are forced to use 'adr' now probably mostly
> eliminates the risk regarding that.
> 
> I did notice that are are 4 or 5 instances (commented inline) of an
> ARM to thumb mode switch which can just as easily be implemented as
> 'blx 1f' instead of using this badr macro (whose use we want to
> discourage, I assume, since the address arithmetic is still slightly
> dodgy). Do you think we should do something about that as well?

Err, probably.  That just looks like an oversight -- I think I'm
responsible for at least some of those.

There's no good reason not to replace adr+BSYM+bx.

For switches from ARM, this could be replaced with bx <label> which
should work just fine.  There shouldn't be any instances of this from
Thumb, because switching instruction set is the whole point here.

(Thumb doesn't have bx <label>, but blx <label> is available at the cost
of clobbering lr).

Cheers
---Dave

[...]
Ard Biesheuvel April 21, 2015, 1:28 p.m. UTC | #4
On 21 April 2015 at 15:21, Dave P Martin <Dave.Martin@arm.com> wrote:
> On Tue, Apr 21, 2015 at 02:10:42PM +0100, Ard Biesheuvel wrote:
>> On 21 April 2015 at 14:50, Russell King - ARM Linux
>> <linux@arm.linux.org.uk> wrote:
>> > On Tue, Apr 21, 2015 at 12:24:20PM +0100, Russell King - ARM Linux wrote:
>> >> We should probably create a badr macro to complement the adr pseudo-op
>> >> which incorporates the BSYM thing so make this clearer - and a comment
>> >> before it.  This is really the case where BSYM should be used.
>> >
>> > Something like this.  Note that I've also removed the BSYM() usage in
>> > the KVM code.
>> >
>>
>> Yes, that is much better. It is a pity that we still can't use '| 1'
>> but the fact that you are forced to use 'adr' now probably mostly
>> eliminates the risk regarding that.
>>
>> I did notice that are are 4 or 5 instances (commented inline) of an
>> ARM to thumb mode switch which can just as easily be implemented as
>> 'blx 1f' instead of using this badr macro (whose use we want to
>> discourage, I assume, since the address arithmetic is still slightly
>> dodgy). Do you think we should do something about that as well?
>
> Err, probably.  That just looks like an oversight -- I think I'm
> responsible for at least some of those.
>
> There's no good reason not to replace adr+BSYM+bx.
>
> For switches from ARM, this could be replaced with bx <label> which
> should work just fine.  There shouldn't be any instances of this from
> Thumb, because switching instruction set is the whole point here.
>
> (Thumb doesn't have bx <label>, but blx <label> is available at the cost
> of clobbering lr).
>

It appears neither have 'bx <label>', but 'add pc, pc, #1; nop' does
the job nicely as well.
And as you say, there are no instances of Thumb->ARM mode switches.
Russell King - ARM Linux April 21, 2015, 1:55 p.m. UTC | #5
On Tue, Apr 21, 2015 at 02:18:03PM +0100, Dave P Martin wrote:
> On Tue, Apr 21, 2015 at 01:50:50PM +0100, Russell King - ARM Linux wrote:
> > On Tue, Apr 21, 2015 at 12:24:20PM +0100, Russell King - ARM Linux wrote:
> > > We should probably create a badr macro to complement the adr pseudo-op
> > > which incorporates the BSYM thing so make this clearer - and a comment
> > > before it.  This is really the case where BSYM should be used.
> > 
> > Something like this.  Note that I've also removed the BSYM() usage in
> > the KVM code.
> 
> Nice.  Wrapping this around adr will make the assembler check that it's
> not a cross-section reference too.

While looking at this, I've become very upset with our toolchain's
abilities.  This is with stock binutils 2.22-2.25, and Ard's suggestion
for using blx:

00000000 <secondary_startup_arm>:
   0:   fafffffe        blx     4 <secondary_startup>

00000004 <secondary_startup>:
   4:   f7ff fffe       bl      0 <__hyp_stub_install_secondary>
   8:   f3ef 8900       mrs     r9, CPSR
   c:   f089 091a       eor.w   r9, r9, #26
  10:   f019 0f1f       tst.w   r9, #31

That's fine, but now if we look at the .head.text section (I also added
an ENTRY(start) to try and solve this):

00000000 <stext>:
   0:   ffff faff                       ; <UNDEFINED> instruction: 0xfffffaff

00000004 <start>:
   4:   fffef7ff        .word   0xfffef7ff
   8:   f3ef 8900       mrs     r9, CPSR
   c:   091af089        .word   0x091af089
  10:   f019 0f1f       tst.w   r9, #31
  14:   091ff029        .word   0x091ff029
  18:   09d3f049        .word   0x09d3f049
  1c:   f049 0920       orr.w   r9, r9, #32
  20:   f449d109        .word   0xf449d109
  24:   f20f7980        .word   0xf20f7980
  28:   0e13            lsrs    r3, r2, #24
  2a:   f399            .short  0xf399
  2c:   8f00            ldrh    r0, [r0, #56]   ; 0x38
  2e:   f38e            .short  0xf38e
  30:   f3de8e30        .word   0xf3de8e30
  34:   8f00            .short  0x8f00
  36:   f389 8100       msr     CPSR_c, r9

readelf for this shows for section 5:

Section Headers:
  [Nr] Name              Type            Addr     Off    Size   ES Flg Lk Inf Al
  [ 5] .head.text        PROGBITS        00000000 000290 000254 00  AX  0   0  4
...
   Num:    Value  Size Type    Bind   Vis      Ndx Name
     4: 00000000     0 SECTION LOCAL  DEFAULT    5
     5: 00000000     0 NOTYPE  LOCAL  DEFAULT    5 $a
     6: 00000004     0 NOTYPE  LOCAL  DEFAULT    5 $t
     7: 0000002e     0 NOTYPE  LOCAL  DEFAULT    5 $d
     8: 00000036     0 NOTYPE  LOCAL  DEFAULT    5 $t
...
    65: 00000000     4 FUNC    GLOBAL DEFAULT    5 stext
    66: 00000005   122 FUNC    GLOBAL DEFAULT    5 start

One has to wonder about the toolchain when the stupid $[adt] hack seems
to be going soooo wrong.

I think I'm going to stop working on this until we have a toolchain
which behaves sensibly... when you can't get the damned thing to
disassemble for confirmation purposes, its best to leave the damned
code alone.
Russell King - ARM Linux April 21, 2015, 2:05 p.m. UTC | #6
On Tue, Apr 21, 2015 at 02:21:46PM +0100, Dave P Martin wrote:
> On Tue, Apr 21, 2015 at 02:10:42PM +0100, Ard Biesheuvel wrote:
> > Yes, that is much better. It is a pity that we still can't use '| 1'
> > but the fact that you are forced to use 'adr' now probably mostly
> > eliminates the risk regarding that.
> > 
> > I did notice that are are 4 or 5 instances (commented inline) of an
> > ARM to thumb mode switch which can just as easily be implemented as
> > 'blx 1f' instead of using this badr macro (whose use we want to
> > discourage, I assume, since the address arithmetic is still slightly
> > dodgy). Do you think we should do something about that as well?
> 
> Err, probably.  That just looks like an oversight -- I think I'm
> responsible for at least some of those.
> 
> There's no good reason not to replace adr+BSYM+bx.
> 
> For switches from ARM, this could be replaced with bx <label> which
> should work just fine.  There shouldn't be any instances of this from
> Thumb, because switching instruction set is the whole point here.

Where do you get this bx <label> from?  It doesn't seem to be in
DDI0406C.
Ard Biesheuvel April 21, 2015, 2:06 p.m. UTC | #7
On 21 April 2015 at 15:55, Russell King - ARM Linux
<linux@arm.linux.org.uk> wrote:
> On Tue, Apr 21, 2015 at 02:18:03PM +0100, Dave P Martin wrote:
>> On Tue, Apr 21, 2015 at 01:50:50PM +0100, Russell King - ARM Linux wrote:
>> > On Tue, Apr 21, 2015 at 12:24:20PM +0100, Russell King - ARM Linux wrote:
>> > > We should probably create a badr macro to complement the adr pseudo-op
>> > > which incorporates the BSYM thing so make this clearer - and a comment
>> > > before it.  This is really the case where BSYM should be used.
>> >
>> > Something like this.  Note that I've also removed the BSYM() usage in
>> > the KVM code.
>>
>> Nice.  Wrapping this around adr will make the assembler check that it's
>> not a cross-section reference too.
>
> While looking at this, I've become very upset with our toolchain's
> abilities.  This is with stock binutils 2.22-2.25, and Ard's suggestion
> for using blx:
>
> 00000000 <secondary_startup_arm>:
>    0:   fafffffe        blx     4 <secondary_startup>
>
> 00000004 <secondary_startup>:
>    4:   f7ff fffe       bl      0 <__hyp_stub_install_secondary>
>    8:   f3ef 8900       mrs     r9, CPSR
>    c:   f089 091a       eor.w   r9, r9, #26
>   10:   f019 0f1f       tst.w   r9, #31
>
> That's fine, but now if we look at the .head.text section (I also added
> an ENTRY(start) to try and solve this):
>
> 00000000 <stext>:
>    0:   ffff faff                       ; <UNDEFINED> instruction: 0xfffffaff
>
> 00000004 <start>:
>    4:   fffef7ff        .word   0xfffef7ff
>    8:   f3ef 8900       mrs     r9, CPSR
>    c:   091af089        .word   0x091af089
>   10:   f019 0f1f       tst.w   r9, #31
>   14:   091ff029        .word   0x091ff029
>   18:   09d3f049        .word   0x09d3f049
>   1c:   f049 0920       orr.w   r9, r9, #32
>   20:   f449d109        .word   0xf449d109
>   24:   f20f7980        .word   0xf20f7980
>   28:   0e13            lsrs    r3, r2, #24
>   2a:   f399            .short  0xf399
>   2c:   8f00            ldrh    r0, [r0, #56]   ; 0x38
>   2e:   f38e            .short  0xf38e
>   30:   f3de8e30        .word   0xf3de8e30
>   34:   8f00            .short  0x8f00
>   36:   f389 8100       msr     CPSR_c, r9
>
> readelf for this shows for section 5:
>
> Section Headers:
>   [Nr] Name              Type            Addr     Off    Size   ES Flg Lk Inf Al
>   [ 5] .head.text        PROGBITS        00000000 000290 000254 00  AX  0   0  4
> ...
>    Num:    Value  Size Type    Bind   Vis      Ndx Name
>      4: 00000000     0 SECTION LOCAL  DEFAULT    5
>      5: 00000000     0 NOTYPE  LOCAL  DEFAULT    5 $a
>      6: 00000004     0 NOTYPE  LOCAL  DEFAULT    5 $t
>      7: 0000002e     0 NOTYPE  LOCAL  DEFAULT    5 $d
>      8: 00000036     0 NOTYPE  LOCAL  DEFAULT    5 $t
> ...
>     65: 00000000     4 FUNC    GLOBAL DEFAULT    5 stext
>     66: 00000005   122 FUNC    GLOBAL DEFAULT    5 start
>
> One has to wonder about the toolchain when the stupid $[adt] hack seems
> to be going soooo wrong.
>
> I think I'm going to stop working on this until we have a toolchain
> which behaves sensibly... when you can't get the damned thing to
> disassemble for confirmation purposes, its best to leave the damned
> code alone.
>

It indeed seems to be objdump that is failing, but the code itself
looks fine to me.  For the record, binutils v2.23.52 works fine
Dave Martin April 21, 2015, 3:51 p.m. UTC | #8
On Tue, Apr 21, 2015 at 03:28:14PM +0200, Ard Biesheuvel wrote:
> On 21 April 2015 at 15:21, Dave P Martin <Dave.Martin@arm.com> wrote:
> > On Tue, Apr 21, 2015 at 02:10:42PM +0100, Ard Biesheuvel wrote:
> >> On 21 April 2015 at 14:50, Russell King - ARM Linux
> >> <linux@arm.linux.org.uk> wrote:
> >> > On Tue, Apr 21, 2015 at 12:24:20PM +0100, Russell King - ARM Linux wrote:
> >> >> We should probably create a badr macro to complement the adr pseudo-op
> >> >> which incorporates the BSYM thing so make this clearer - and a comment
> >> >> before it.  This is really the case where BSYM should be used.
> >> >
> >> > Something like this.  Note that I've also removed the BSYM() usage in
> >> > the KVM code.
> >> >
> >>
> >> Yes, that is much better. It is a pity that we still can't use '| 1'
> >> but the fact that you are forced to use 'adr' now probably mostly
> >> eliminates the risk regarding that.
> >>
> >> I did notice that are are 4 or 5 instances (commented inline) of an
> >> ARM to thumb mode switch which can just as easily be implemented as
> >> 'blx 1f' instead of using this badr macro (whose use we want to
> >> discourage, I assume, since the address arithmetic is still slightly
> >> dodgy). Do you think we should do something about that as well?
> >
> > Err, probably.  That just looks like an oversight -- I think I'm
> > responsible for at least some of those.
> >
> > There's no good reason not to replace adr+BSYM+bx.
> >
> > For switches from ARM, this could be replaced with bx <label> which
> > should work just fine.  There shouldn't be any instances of this from
> > Thumb, because switching instruction set is the whole point here.
> >
> > (Thumb doesn't have bx <label>, but blx <label> is available at the cost
> > of clobbering lr).
> >
> 
> It appears neither have 'bx <label>', but 'add pc, pc, #1; nop' does

Duh, I'm getting myself confused.  Yes, both have blx <label> but not
bx <label>.

Note that blx may result in suboptimal branch prediction because it
looks like a function call.  But these aren't hot paths, so I doubt
it matters.

> the job nicely as well.

Bit icky though...

Cheers
---Dave
Dave Martin April 21, 2015, 5:03 p.m. UTC | #9
On Tue, Apr 21, 2015 at 04:06:02PM +0200, Ard Biesheuvel wrote:
> On 21 April 2015 at 15:55, Russell King - ARM Linux
> <linux@arm.linux.org.uk> wrote:
> > On Tue, Apr 21, 2015 at 02:18:03PM +0100, Dave P Martin wrote:
> >> On Tue, Apr 21, 2015 at 01:50:50PM +0100, Russell King - ARM Linux wrote:
> >> > On Tue, Apr 21, 2015 at 12:24:20PM +0100, Russell King - ARM Linux wrote:
> >> > > We should probably create a badr macro to complement the adr pseudo-op
> >> > > which incorporates the BSYM thing so make this clearer - and a comment
> >> > > before it.  This is really the case where BSYM should be used.
> >> >
> >> > Something like this.  Note that I've also removed the BSYM() usage in
> >> > the KVM code.
> >>
> >> Nice.  Wrapping this around adr will make the assembler check that it's
> >> not a cross-section reference too.
> >
> > While looking at this, I've become very upset with our toolchain's
> > abilities.  This is with stock binutils 2.22-2.25, and Ard's suggestion
> > for using blx:
> >
> > 00000000 <secondary_startup_arm>:
> >    0:   fafffffe        blx     4 <secondary_startup>
> >
> > 00000004 <secondary_startup>:
> >    4:   f7ff fffe       bl      0 <__hyp_stub_install_secondary>
> >    8:   f3ef 8900       mrs     r9, CPSR
> >    c:   f089 091a       eor.w   r9, r9, #26
> >   10:   f019 0f1f       tst.w   r9, #31
> >
> > That's fine, but now if we look at the .head.text section (I also added
> > an ENTRY(start) to try and solve this):
> >
> > 00000000 <stext>:
> >    0:   ffff faff                       ; <UNDEFINED> instruction: 0xfffffaff
> >
> > 00000004 <start>:
> >    4:   fffef7ff        .word   0xfffef7ff
> >    8:   f3ef 8900       mrs     r9, CPSR
> >    c:   091af089        .word   0x091af089
> >   10:   f019 0f1f       tst.w   r9, #31
> >   14:   091ff029        .word   0x091ff029
> >   18:   09d3f049        .word   0x09d3f049
> >   1c:   f049 0920       orr.w   r9, r9, #32
> >   20:   f449d109        .word   0xf449d109
> >   24:   f20f7980        .word   0xf20f7980
> >   28:   0e13            lsrs    r3, r2, #24
> >   2a:   f399            .short  0xf399
> >   2c:   8f00            ldrh    r0, [r0, #56]   ; 0x38
> >   2e:   f38e            .short  0xf38e
> >   30:   f3de8e30        .word   0xf3de8e30
> >   34:   8f00            .short  0x8f00
> >   36:   f389 8100       msr     CPSR_c, r9
> >
> > readelf for this shows for section 5:
> >
> > Section Headers:
> >   [Nr] Name              Type            Addr     Off    Size   ES Flg Lk Inf Al
> >   [ 5] .head.text        PROGBITS        00000000 000290 000254 00  AX  0   0  4
> > ...
> >    Num:    Value  Size Type    Bind   Vis      Ndx Name
> >      4: 00000000     0 SECTION LOCAL  DEFAULT    5
> >      5: 00000000     0 NOTYPE  LOCAL  DEFAULT    5 $a
> >      6: 00000004     0 NOTYPE  LOCAL  DEFAULT    5 $t
> >      7: 0000002e     0 NOTYPE  LOCAL  DEFAULT    5 $d
> >      8: 00000036     0 NOTYPE  LOCAL  DEFAULT    5 $t
> > ...
> >     65: 00000000     4 FUNC    GLOBAL DEFAULT    5 stext
> >     66: 00000005   122 FUNC    GLOBAL DEFAULT    5 start
> >
> > One has to wonder about the toolchain when the stupid $[adt] hack seems
> > to be going soooo wrong.
> >
> > I think I'm going to stop working on this until we have a toolchain
> > which behaves sensibly... when you can't get the damned thing to
> > disassemble for confirmation purposes, its best to leave the damned
> > code alone.
> >
> 
> It indeed seems to be objdump that is failing, but the code itself
> looks fine to me.  For the record, binutils v2.23.52 works fine

I've come across weird disassembly issues like this in the past.
The objdump code is something of a fragmented mess (shock, horror),
but I think there were fixes in the pipeline for some issues of this
sort.

If it is possible to manually check most or all of the cases of
badr, that might be sufficient -- this only gets used in a few,
specific places.

Objdump doesn't inspire much confidence though :(

Cheers
---Dave
diff mbox

Patch

diff --git a/arch/arm/boot/compressed/head.S b/arch/arm/boot/compressed/head.S
index 2c45b5709fa4..06e983f59980 100644
--- a/arch/arm/boot/compressed/head.S
+++ b/arch/arm/boot/compressed/head.S
@@ -130,7 +130,7 @@  start:
 		.endr
    ARM(		mov	r0, r0		)
    ARM(		b	1f		)
- THUMB(		adr	r12, BSYM(1f)	)
+ THUMB(		badr	r12, 1f		)
  THUMB(		bx	r12		)
 
 		.word	_magic_sig	@ Magic numbers to help the loader
@@ -447,7 +447,7 @@  dtb_check_done:
 
 		bl	cache_clean_flush
 
-		adr	r0, BSYM(restart)
+		badr	r0, restart
 		add	r0, r0, r6
 		mov	pc, r0
 
diff --git a/arch/arm/common/mcpm_head.S b/arch/arm/common/mcpm_head.S
index e02db4b81a66..08b3bb9bc6a2 100644
--- a/arch/arm/common/mcpm_head.S
+++ b/arch/arm/common/mcpm_head.S
@@ -49,7 +49,7 @@ 
 ENTRY(mcpm_entry_point)
 
  ARM_BE8(setend        be)
- THUMB(	adr	r12, BSYM(1f)	)
+ THUMB(	badr	r12, 1f		)
  THUMB(	bx	r12		)
  THUMB(	.thumb			)
 1:
diff --git a/arch/arm/include/asm/assembler.h b/arch/arm/include/asm/assembler.h
index 186270b3e194..4abe57279c66 100644
--- a/arch/arm/include/asm/assembler.h
+++ b/arch/arm/include/asm/assembler.h
@@ -178,6 +178,21 @@ 
 	.endm
 
 /*
+ * Assembly version of "adr rd, BSYM(sym)".  This should only be used to
+ * reference local symbols in the same assembly file which are to be
+ * resolved by the assembler.  Other usage is undefined.
+ */
+	.irp	c,,eq,ne,cs,cc,mi,pl,vs,vc,hi,ls,ge,lt,gt,le,hs,lo
+	.macro	badr\c, rd, sym
+#ifdef CONFIG_THUMB2_KERNEL
+	adr\c	\rd, \sym + 1
+#else
+	adr\c	\rd, \sym
+#endif
+	.endm
+	.endr
+
+/*
  * Get current thread_info.
  */
 	.macro	get_thread_info, rd
@@ -326,7 +341,7 @@ 
 THUMB(	orr	\reg , \reg , #PSR_T_BIT	)
 	bne	1f
 	orr	\reg, \reg, #PSR_A_BIT
-	adr	lr, BSYM(2f)
+	badr	lr, 2f
 	msr	spsr_cxsf, \reg
 	__MSR_ELR_HYP(14)
 	__ERET
diff --git a/arch/arm/include/asm/entry-macro-multi.S b/arch/arm/include/asm/entry-macro-multi.S
index 469a2b30fa27..609184f522ee 100644
--- a/arch/arm/include/asm/entry-macro-multi.S
+++ b/arch/arm/include/asm/entry-macro-multi.S
@@ -10,7 +10,7 @@ 
 	@
 	@ routine called with r0 = irq number, r1 = struct pt_regs *
 	@
-	adrne	lr, BSYM(1b)
+	badrne	lr, 1b
 	bne	asm_do_IRQ
 
 #ifdef CONFIG_SMP
@@ -23,7 +23,7 @@ 
 	ALT_SMP(test_for_ipi r0, r2, r6, lr)
 	ALT_UP_B(9997f)
 	movne	r1, sp
-	adrne	lr, BSYM(1b)
+	badrne	lr, 1b
 	bne	do_IPI
 #endif
 9997:
diff --git a/arch/arm/include/asm/unified.h b/arch/arm/include/asm/unified.h
index 200f9a7cd623..a91ae499614c 100644
--- a/arch/arm/include/asm/unified.h
+++ b/arch/arm/include/asm/unified.h
@@ -45,7 +45,6 @@ 
 #define THUMB(x...)	x
 #ifdef __ASSEMBLY__
 #define W(instr)	instr.w
-#define BSYM(sym)	sym + 1
 #else
 #define WASM(instr)	#instr ".w"
 #endif
@@ -59,7 +58,6 @@ 
 #define THUMB(x...)
 #ifdef __ASSEMBLY__
 #define W(instr)	instr
-#define BSYM(sym)	sym
 #else
 #define WASM(instr)	#instr
 #endif
diff --git a/arch/arm/kernel/entry-armv.S b/arch/arm/kernel/entry-armv.S
index 570306c49406..f8f7398c74c2 100644
--- a/arch/arm/kernel/entry-armv.S
+++ b/arch/arm/kernel/entry-armv.S
@@ -40,7 +40,7 @@ 
 #ifdef CONFIG_MULTI_IRQ_HANDLER
 	ldr	r1, =handle_arch_irq
 	mov	r0, sp
-	adr	lr, BSYM(9997f)
+	badr	lr, 9997f
 	ldr	pc, [r1]
 #else
 	arch_irq_handler_default
@@ -273,7 +273,7 @@  __und_svc:
 	str	r4, [sp, #S_PC]
 	orr	r0, r9, r0, lsl #16
 #endif
-	adr	r9, BSYM(__und_svc_finish)
+	badr	r9, __und_svc_finish
 	mov	r2, r4
 	bl	call_fpe
 
@@ -469,7 +469,7 @@  __und_usr:
 	@ instruction, or the more conventional lr if we are to treat
 	@ this as a real undefined instruction
 	@
-	adr	r9, BSYM(ret_from_exception)
+	badr	r9, ret_from_exception
 
 	@ IRQs must be enabled before attempting to read the instruction from
 	@ user space since that could cause a page/translation fault if the
@@ -486,7 +486,7 @@  __und_usr:
 	@ r2 = PC value for the following instruction (:= regs->ARM_pc)
 	@ r4 = PC value for the faulting instruction
 	@ lr = 32-bit undefined instruction function
-	adr	lr, BSYM(__und_usr_fault_32)
+	badr	lr, __und_usr_fault_32
 	b	call_fpe
 
 __und_usr_thumb:
@@ -522,7 +522,7 @@  ARM_BE8(rev16	r0, r0)				@ little endian instruction
 	add	r2, r2, #2			@ r2 is PC + 2, make it PC + 4
 	str	r2, [sp, #S_PC]			@ it's a 2x16bit instr, update
 	orr	r0, r0, r5, lsl #16
-	adr	lr, BSYM(__und_usr_fault_32)
+	badr	lr, __und_usr_fault_32
 	@ r0 = the two 16-bit Thumb instructions which caused the exception
 	@ r2 = PC value for the following Thumb instruction (:= regs->ARM_pc)
 	@ r4 = PC value for the first 16-bit Thumb instruction
@@ -716,7 +716,7 @@  __und_usr_fault_32:
 __und_usr_fault_16:
 	mov	r1, #2
 1:	mov	r0, sp
-	adr	lr, BSYM(ret_from_exception)
+	badr	lr, ret_from_exception
 	b	__und_fault
 ENDPROC(__und_usr_fault_32)
 ENDPROC(__und_usr_fault_16)
diff --git a/arch/arm/kernel/entry-common.S b/arch/arm/kernel/entry-common.S
index f8ccc21fa032..6ab159384667 100644
--- a/arch/arm/kernel/entry-common.S
+++ b/arch/arm/kernel/entry-common.S
@@ -88,7 +88,7 @@  ENTRY(ret_from_fork)
 	bl	schedule_tail
 	cmp	r5, #0
 	movne	r0, r4
-	adrne	lr, BSYM(1f)
+	badrne	lr, 1f
 	retne	r5
 1:	get_thread_info tsk
 	b	ret_slow_syscall
@@ -196,7 +196,7 @@  local_restart:
 	bne	__sys_trace
 
 	cmp	scno, #NR_syscalls		@ check upper syscall limit
-	adr	lr, BSYM(ret_fast_syscall)	@ return address
+	badr	lr, ret_fast_syscall		@ return address
 	ldrcc	pc, [tbl, scno, lsl #2]		@ call sys_* routine
 
 	add	r1, sp, #S_OFF
@@ -231,7 +231,7 @@  __sys_trace:
 	add	r0, sp, #S_OFF
 	bl	syscall_trace_enter
 
-	adr	lr, BSYM(__sys_trace_return)	@ return address
+	badr	lr, __sys_trace_return		@ return address
 	mov	scno, r0			@ syscall number (possibly new)
 	add	r1, sp, #S_R0 + S_OFF		@ pointer to regs
 	cmp	scno, #NR_syscalls		@ check upper syscall limit
diff --git a/arch/arm/kernel/entry-ftrace.S b/arch/arm/kernel/entry-ftrace.S
index fe57c73e70a4..c73c4030ca5d 100644
--- a/arch/arm/kernel/entry-ftrace.S
+++ b/arch/arm/kernel/entry-ftrace.S
@@ -87,7 +87,7 @@ 
 
 1: 	mcount_get_lr	r1			@ lr of instrumented func
 	mcount_adjust_addr	r0, lr		@ instrumented function
-	adr	lr, BSYM(2f)
+	badr	lr, 2f
 	mov	pc, r2
 2:	mcount_exit
 .endm
diff --git a/arch/arm/kernel/head-nommu.S b/arch/arm/kernel/head-nommu.S
index 84da14b7cd04..c9660167ef1a 100644
--- a/arch/arm/kernel/head-nommu.S
+++ b/arch/arm/kernel/head-nommu.S
@@ -46,7 +46,7 @@  ENTRY(stext)
 	.arm
 ENTRY(stext)
 
- THUMB(	adr	r9, BSYM(1f)	)	@ Kernel is always entered in ARM.
+ THUMB(	badr	r9, 1f		)	@ Kernel is always entered in ARM.
  THUMB(	bx	r9		)	@ If this is a Thumb-2 kernel,
  THUMB(	.thumb			)	@ switch to Thumb now.
  THUMB(1:			)
@@ -79,7 +79,7 @@  ENTRY(stext)
 #endif
 	ldr	r13, =__mmap_switched		@ address to jump to after
 						@ initialising sctlr
-	adr	lr, BSYM(1f)			@ return (PIC) address
+	badr	lr, 1f				@ return (PIC) address
 	ldr	r12, [r10, #PROCINFO_INITFUNC]
 	add	r12, r12, r10
 	ret	r12
@@ -115,7 +115,7 @@  ENTRY(secondary_startup)
 	bl      __setup_mpu			@ Initialize the MPU
 #endif
 
-	adr	lr, BSYM(__after_proc_init)	@ return address
+	badr	lr, __after_proc_init		@ return address
 	mov	r13, r12			@ __secondary_switched address
 	ldr	r12, [r10, #PROCINFO_INITFUNC]
 	add	r12, r12, r10
diff --git a/arch/arm/kernel/head.S b/arch/arm/kernel/head.S
index 7304b4c44b52..1eecd57453be 100644
--- a/arch/arm/kernel/head.S
+++ b/arch/arm/kernel/head.S
@@ -80,7 +80,7 @@ 
 ENTRY(stext)
  ARM_BE8(setend	be )			@ ensure we are in BE8 mode
 
- THUMB(	adr	r9, BSYM(1f)	)	@ Kernel is always entered in ARM.
+ THUMB(	badr	r9, 1f		)	@ Kernel is always entered in ARM.
  THUMB(	bx	r9		)	@ If this is a Thumb-2 kernel,
  THUMB(	.thumb			)	@ switch to Thumb now.
  THUMB(1:			)
@@ -148,7 +148,7 @@  ENTRY(stext)
 	 */
 	ldr	r13, =__mmap_switched		@ address to jump to after
 						@ mmu has been enabled
-	adr	lr, BSYM(1f)			@ return (PIC) address
+	badr	lr, 1f				@ return (PIC) address
 #ifdef CONFIG_ARM_LPAE
 	mov	r5, #0				@ high TTBR0
 	mov	r8, r4, lsr #12			@ TTBR1 is swapper_pg_dir pfn
@@ -364,7 +364,7 @@  __turn_mmu_on_loc:
 	.text
 ENTRY(secondary_startup_arm)
 	.arm
- THUMB(	adr	r9, BSYM(1f)	)	@ Kernel is entered in ARM.
+ THUMB(	badr	r9, 1f		)	@ Kernel is entered in ARM.
  THUMB(	bx	r9		)	@ If this is a Thumb-2 kernel,
  THUMB(	.thumb			)	@ switch to Thumb now.
  THUMB(1:			)
@@ -400,7 +400,7 @@  ENTRY(secondary_startup)
 	add	r3, r7, lr
 	ldrd	r4, [r3, #0]			@ get secondary_data.pgdir
 	ldr	r8, [r3, #8]			@ get secondary_data.swapper_pg_dir
-	adr	lr, BSYM(__enable_mmu)		@ return address
+	badr	lr, __enable_mmu		@ return address
 	mov	r13, r12			@ __secondary_switched address
 	ldr	r12, [r10, #PROCINFO_INITFUNC]
 	add	r12, r12, r10			@ initialise processor
diff --git a/arch/arm/kernel/sleep.S b/arch/arm/kernel/sleep.S
index 7d37bfc50830..76bb3128e135 100644
--- a/arch/arm/kernel/sleep.S
+++ b/arch/arm/kernel/sleep.S
@@ -81,7 +81,7 @@  ENTRY(__cpu_suspend)
 	mov	r1, r4			@ size of save block
 	add	r0, sp, #8		@ pointer to save block
 	bl	__cpu_suspend_save
-	adr	lr, BSYM(cpu_suspend_abort)
+	badr	lr, cpu_suspend_abort
 	ldmfd	sp!, {r0, pc}		@ call suspend fn
 ENDPROC(__cpu_suspend)
 	.ltorg
diff --git a/arch/arm/kvm/interrupts.S b/arch/arm/kvm/interrupts.S
index 79caf79b304a..87847d2c5f99 100644
--- a/arch/arm/kvm/interrupts.S
+++ b/arch/arm/kvm/interrupts.S
@@ -309,7 +309,7 @@  ENTRY(kvm_call_hyp)
 THUMB(	orr	r2, r2, #PSR_T_BIT	)
 	msr	spsr_cxsf, r2
 	mrs	r1, ELR_hyp
-	ldr	r2, =BSYM(panic)
+	ldr	r2, =panic
 	msr	ELR_hyp, r2
 	ldr	r0, =\panic_str
 	clrex				@ Clear exclusive monitor
diff --git a/arch/arm/lib/call_with_stack.S b/arch/arm/lib/call_with_stack.S
index ed1a421813cb..bf3a40889205 100644
--- a/arch/arm/lib/call_with_stack.S
+++ b/arch/arm/lib/call_with_stack.S
@@ -35,7 +35,7 @@  ENTRY(call_with_stack)
 	mov	r2, r0
 	mov	r0, r1
 
-	adr	lr, BSYM(1f)
+	badr	lr, 1f
 	ret	r2
 
 1:	ldr	lr, [sp]
diff --git a/arch/arm/mm/proc-v7m.S b/arch/arm/mm/proc-v7m.S
index e08e1f2bab76..67d9209077c6 100644
--- a/arch/arm/mm/proc-v7m.S
+++ b/arch/arm/mm/proc-v7m.S
@@ -98,7 +98,7 @@  __v7m_setup:
 	str	r5, [r0, V7M_SCB_SHPR3]	@ set PendSV priority
 
 	@ SVC to run the kernel in this mode
-	adr	r1, BSYM(1f)
+	badr	r1, 1f
 	ldr	r5, [r12, #11 * 4]	@ read the SVC vector entry
 	str	r1, [r12, #11 * 4]	@ write the temporary SVC vector entry
 	mov	r6, lr			@ save LR