diff mbox

[1/3] perf tests: Introduce perf_regs_load function on ARM64

Message ID 1395222155-22205-2-git-send-email-jean.pihet@linaro.org (mailing list archive)
State New, archived
Headers show

Commit Message

Jean Pihet March 19, 2014, 9:42 a.m. UTC
Introducing perf_regs_load function, which is going
to be used for dwarf unwind test in following patches.

It takes single argument as a pointer to the regs dump
buffer and populates it with current registers values, as
expected by the perf built-in unwinding test.

Signed-off-by: Jean Pihet <jean.pihet@linaro.org>
Cc: Steve Capper <steve.capper@linaro.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Arnaldo Carvalho de Melo <acme@ghostprotocols.net>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jiri Olsa <jolsa@redhat.com>
---
 tools/perf/arch/arm64/Makefile            |  1 +
 tools/perf/arch/arm64/include/perf_regs.h |  2 ++
 tools/perf/arch/arm64/tests/regs_load.S   | 39 +++++++++++++++++++++++++++++++
 3 files changed, 42 insertions(+)
 create mode 100644 tools/perf/arch/arm64/tests/regs_load.S

Comments

Mark Rutland March 21, 2014, 3:11 p.m. UTC | #1
Hi Jean,

On Wed, Mar 19, 2014 at 09:42:33AM +0000, Jean Pihet wrote:
> Introducing perf_regs_load function, which is going
> to be used for dwarf unwind test in following patches.
> 
> It takes single argument as a pointer to the regs dump
> buffer and populates it with current registers values, as
> expected by the perf built-in unwinding test.
> 
> Signed-off-by: Jean Pihet <jean.pihet@linaro.org>
> Cc: Steve Capper <steve.capper@linaro.org>
> Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
> Cc: Frederic Weisbecker <fweisbec@gmail.com>
> Cc: Ingo Molnar <mingo@kernel.org>
> Cc: Namhyung Kim <namhyung@kernel.org>
> Cc: Paul Mackerras <paulus@samba.org>
> Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
> Cc: Arnaldo Carvalho de Melo <acme@ghostprotocols.net>
> Cc: David Ahern <dsahern@gmail.com>
> Cc: Jiri Olsa <jolsa@redhat.com>
> ---
>  tools/perf/arch/arm64/Makefile            |  1 +
>  tools/perf/arch/arm64/include/perf_regs.h |  2 ++
>  tools/perf/arch/arm64/tests/regs_load.S   | 39 +++++++++++++++++++++++++++++++
>  3 files changed, 42 insertions(+)
>  create mode 100644 tools/perf/arch/arm64/tests/regs_load.S
> 
> diff --git a/tools/perf/arch/arm64/Makefile b/tools/perf/arch/arm64/Makefile
> index 67e9b3d..9b8f87e 100644
> --- a/tools/perf/arch/arm64/Makefile
> +++ b/tools/perf/arch/arm64/Makefile
> @@ -4,4 +4,5 @@ LIB_OBJS += $(OUTPUT)arch/$(ARCH)/util/dwarf-regs.o
>  endif
>  ifndef NO_LIBUNWIND
>  LIB_OBJS += $(OUTPUT)arch/$(ARCH)/util/unwind-libunwind.o
> +LIB_OBJS += $(OUTPUT)arch/$(ARCH)/tests/regs_load.o
>  endif
> diff --git a/tools/perf/arch/arm64/include/perf_regs.h b/tools/perf/arch/arm64/include/perf_regs.h
> index 2359546..1e052f1 100644
> --- a/tools/perf/arch/arm64/include/perf_regs.h
> +++ b/tools/perf/arch/arm64/include/perf_regs.h
> @@ -9,6 +9,8 @@
>  #define PERF_REG_IP	PERF_REG_ARM64_PC
>  #define PERF_REG_SP	PERF_REG_ARM64_SP
>  
> +void perf_regs_load(u64 *regs);
> +
>  static inline const char *perf_reg_name(int id)
>  {
>  	switch (id) {
> diff --git a/tools/perf/arch/arm64/tests/regs_load.S b/tools/perf/arch/arm64/tests/regs_load.S
> new file mode 100644
> index 0000000..92ab968
> --- /dev/null
> +++ b/tools/perf/arch/arm64/tests/regs_load.S
> @@ -0,0 +1,39 @@
> +#include <linux/linkage.h>
> +
> +/*
> + * Implementation of void perf_regs_load(u64 *regs);
> + *
> + * This functions fills in the 'regs' buffer from the actual registers values,
> + * in the way the perf built-in unwinding test expects them:
> + * - the PC at the time at the call to this function. Since this function
> + *   is called using a bl instruction, the PC value is taken from LR,

Is it guaranteed that this function is always invoked with a branch with
link instruction, or is that just what current compiler versions are
doing? I couldn't see where we would get that guarantee from.

If it is called with a branch with link, then the LR value will be the
PC at call time + 4, rather than just the exact PC at call time. If not
then we don't have a guaranteed relationship between the PC at call time
and the current LR value.

If the only place that perf_regs_load is used is a single test which
doesn't care about the precise PC at the time of the call, then it's
probably OK to use the LR value, but we should be careful to document
what the faked-up PC actually is and how we expect it to be used.

> + * - the current SP (not touched by this function),
> + * - the current value of LR is merely retrieved and stored because the
> + *   value before the call to this function is unknown at this time; it will
> + *   be unwound from the dwarf information in unwind__get_entries.
> + */
> +
> +.text
> +.type perf_regs_load,%function
> +ENTRY(perf_regs_load)
> +	stp x0,  x1,  [x0], #16	// store x0..x29
> +	stp x2,  x3,  [x0], #16
> +	stp x4,  x5,  [x0], #16
> +	stp x6,  x7,  [x0], #16
> +	stp x8,  x9,  [x0], #16
> +	stp x10, x11, [x0], #16
> +	stp x12, x13, [x0], #16
> +	stp x14, x15, [x0], #16
> +	stp x16, x17, [x0], #16
> +	stp x18, x19, [x0], #16
> +	stp x20, x21, [x0], #16
> +	stp x22, x23, [x0], #16
> +	stp x24, x25, [x0], #16
> +	stp x26, x27, [x0], #16
> +	stp x28, x29, [x0], #16
> +	mov x1,  sp
> +	stp x30, x1,  [x0], #16	// store lr and sp
> +	str x30, [x0]		// store pc as lr in order to skip the call
> +				//  to this function

It might be better to word this a "store the lr in place of the pc". To
me at least the current wording implies the opposite of what the code
seems to be doing.

Cheers,
Mark.
Jean Pihet March 25, 2014, 3:23 p.m. UTC | #2
Hi Mark,

On 21 March 2014 16:11, Mark Rutland <mark.rutland@arm.com> wrote:
> Hi Jean,
>
> On Wed, Mar 19, 2014 at 09:42:33AM +0000, Jean Pihet wrote:
>> Introducing perf_regs_load function, which is going
>> to be used for dwarf unwind test in following patches.
>>
>> It takes single argument as a pointer to the regs dump
>> buffer and populates it with current registers values, as
>> expected by the perf built-in unwinding test.
>>
>> Signed-off-by: Jean Pihet <jean.pihet@linaro.org>
>> Cc: Steve Capper <steve.capper@linaro.org>
>> Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
>> Cc: Frederic Weisbecker <fweisbec@gmail.com>
>> Cc: Ingo Molnar <mingo@kernel.org>
>> Cc: Namhyung Kim <namhyung@kernel.org>
>> Cc: Paul Mackerras <paulus@samba.org>
>> Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
>> Cc: Arnaldo Carvalho de Melo <acme@ghostprotocols.net>
>> Cc: David Ahern <dsahern@gmail.com>
>> Cc: Jiri Olsa <jolsa@redhat.com>
>> ---
>>  tools/perf/arch/arm64/Makefile            |  1 +
>>  tools/perf/arch/arm64/include/perf_regs.h |  2 ++
>>  tools/perf/arch/arm64/tests/regs_load.S   | 39 +++++++++++++++++++++++++++++++
>>  3 files changed, 42 insertions(+)
>>  create mode 100644 tools/perf/arch/arm64/tests/regs_load.S
>>
>> diff --git a/tools/perf/arch/arm64/Makefile b/tools/perf/arch/arm64/Makefile
>> index 67e9b3d..9b8f87e 100644
>> --- a/tools/perf/arch/arm64/Makefile
>> +++ b/tools/perf/arch/arm64/Makefile
>> @@ -4,4 +4,5 @@ LIB_OBJS += $(OUTPUT)arch/$(ARCH)/util/dwarf-regs.o
>>  endif
>>  ifndef NO_LIBUNWIND
>>  LIB_OBJS += $(OUTPUT)arch/$(ARCH)/util/unwind-libunwind.o
>> +LIB_OBJS += $(OUTPUT)arch/$(ARCH)/tests/regs_load.o
>>  endif
>> diff --git a/tools/perf/arch/arm64/include/perf_regs.h b/tools/perf/arch/arm64/include/perf_regs.h
>> index 2359546..1e052f1 100644
>> --- a/tools/perf/arch/arm64/include/perf_regs.h
>> +++ b/tools/perf/arch/arm64/include/perf_regs.h
>> @@ -9,6 +9,8 @@
>>  #define PERF_REG_IP  PERF_REG_ARM64_PC
>>  #define PERF_REG_SP  PERF_REG_ARM64_SP
>>
>> +void perf_regs_load(u64 *regs);
>> +
>>  static inline const char *perf_reg_name(int id)
>>  {
>>       switch (id) {
>> diff --git a/tools/perf/arch/arm64/tests/regs_load.S b/tools/perf/arch/arm64/tests/regs_load.S
>> new file mode 100644
>> index 0000000..92ab968
>> --- /dev/null
>> +++ b/tools/perf/arch/arm64/tests/regs_load.S
>> @@ -0,0 +1,39 @@
>> +#include <linux/linkage.h>
>> +
>> +/*
>> + * Implementation of void perf_regs_load(u64 *regs);
>> + *
>> + * This functions fills in the 'regs' buffer from the actual registers values,
>> + * in the way the perf built-in unwinding test expects them:
>> + * - the PC at the time at the call to this function. Since this function
>> + *   is called using a bl instruction, the PC value is taken from LR,
>
> Is it guaranteed that this function is always invoked with a branch with
> link instruction, or is that just what current compiler versions are
> doing? I couldn't see where we would get that guarantee from.
The current compiler implements the call as a bl instruction.

> If it is called with a branch with link, then the LR value will be the
> PC at call time + 4, rather than just the exact PC at call time. If not
> then we don't have a guaranteed relationship between the PC at call time
> and the current LR value.
>
> If the only place that perf_regs_load is used is a single test which
> doesn't care about the precise PC at the time of the call, then it's
> probably OK to use the LR value, but we should be careful to document
> what the faked-up PC actually is and how we expect it to be used.
The code is only used by an unwinding test. The unwinding code
resolves the function name from an address range found in the dwarf
information so in principle it is ok to use the PC/LR at the time of
the call to a function.

Is the comment above OK or do you want an update of the code as well?

>
>> + * - the current SP (not touched by this function),
>> + * - the current value of LR is merely retrieved and stored because the
>> + *   value before the call to this function is unknown at this time; it will
>> + *   be unwound from the dwarf information in unwind__get_entries.
>> + */
>> +
>> +.text
>> +.type perf_regs_load,%function
>> +ENTRY(perf_regs_load)
>> +     stp x0,  x1,  [x0], #16 // store x0..x29
>> +     stp x2,  x3,  [x0], #16
>> +     stp x4,  x5,  [x0], #16
>> +     stp x6,  x7,  [x0], #16
>> +     stp x8,  x9,  [x0], #16
>> +     stp x10, x11, [x0], #16
>> +     stp x12, x13, [x0], #16
>> +     stp x14, x15, [x0], #16
>> +     stp x16, x17, [x0], #16
>> +     stp x18, x19, [x0], #16
>> +     stp x20, x21, [x0], #16
>> +     stp x22, x23, [x0], #16
>> +     stp x24, x25, [x0], #16
>> +     stp x26, x27, [x0], #16
>> +     stp x28, x29, [x0], #16
>> +     mov x1,  sp
>> +     stp x30, x1,  [x0], #16 // store lr and sp
>> +     str x30, [x0]           // store pc as lr in order to skip the call
>> +                             //  to this function
>
> It might be better to word this a "store the lr in place of the pc". To
> me at least the current wording implies the opposite of what the code
> seems to be doing.
Ok the last comment can be updated.

Thanks!
Jean

>
> Cheers,
> Mark.
> --
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at  http://www.tux.org/lkml/
Jean Pihet April 4, 2014, 7:51 a.m. UTC | #3
Hi Mark,

ping on this series, see comment below.

On 25 March 2014 16:23, Jean Pihet <jean.pihet@linaro.org> wrote:
> Hi Mark,
>
> On 21 March 2014 16:11, Mark Rutland <mark.rutland@arm.com> wrote:
>> Hi Jean,
>>
>> On Wed, Mar 19, 2014 at 09:42:33AM +0000, Jean Pihet wrote:
>>> Introducing perf_regs_load function, which is going
>>> to be used for dwarf unwind test in following patches.
>>>
>>> It takes single argument as a pointer to the regs dump
>>> buffer and populates it with current registers values, as
>>> expected by the perf built-in unwinding test.
>>>
>>> Signed-off-by: Jean Pihet <jean.pihet@linaro.org>
>>> Cc: Steve Capper <steve.capper@linaro.org>
>>> Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
>>> Cc: Frederic Weisbecker <fweisbec@gmail.com>
>>> Cc: Ingo Molnar <mingo@kernel.org>
>>> Cc: Namhyung Kim <namhyung@kernel.org>
>>> Cc: Paul Mackerras <paulus@samba.org>
>>> Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
>>> Cc: Arnaldo Carvalho de Melo <acme@ghostprotocols.net>
>>> Cc: David Ahern <dsahern@gmail.com>
>>> Cc: Jiri Olsa <jolsa@redhat.com>
>>> ---
>>>  tools/perf/arch/arm64/Makefile            |  1 +
>>>  tools/perf/arch/arm64/include/perf_regs.h |  2 ++
>>>  tools/perf/arch/arm64/tests/regs_load.S   | 39 +++++++++++++++++++++++++++++++
>>>  3 files changed, 42 insertions(+)
>>>  create mode 100644 tools/perf/arch/arm64/tests/regs_load.S
>>>
>>> diff --git a/tools/perf/arch/arm64/Makefile b/tools/perf/arch/arm64/Makefile
>>> index 67e9b3d..9b8f87e 100644
>>> --- a/tools/perf/arch/arm64/Makefile
>>> +++ b/tools/perf/arch/arm64/Makefile
>>> @@ -4,4 +4,5 @@ LIB_OBJS += $(OUTPUT)arch/$(ARCH)/util/dwarf-regs.o
>>>  endif
>>>  ifndef NO_LIBUNWIND
>>>  LIB_OBJS += $(OUTPUT)arch/$(ARCH)/util/unwind-libunwind.o
>>> +LIB_OBJS += $(OUTPUT)arch/$(ARCH)/tests/regs_load.o
>>>  endif
>>> diff --git a/tools/perf/arch/arm64/include/perf_regs.h b/tools/perf/arch/arm64/include/perf_regs.h
>>> index 2359546..1e052f1 100644
>>> --- a/tools/perf/arch/arm64/include/perf_regs.h
>>> +++ b/tools/perf/arch/arm64/include/perf_regs.h
>>> @@ -9,6 +9,8 @@
>>>  #define PERF_REG_IP  PERF_REG_ARM64_PC
>>>  #define PERF_REG_SP  PERF_REG_ARM64_SP
>>>
>>> +void perf_regs_load(u64 *regs);
>>> +
>>>  static inline const char *perf_reg_name(int id)
>>>  {
>>>       switch (id) {
>>> diff --git a/tools/perf/arch/arm64/tests/regs_load.S b/tools/perf/arch/arm64/tests/regs_load.S
>>> new file mode 100644
>>> index 0000000..92ab968
>>> --- /dev/null
>>> +++ b/tools/perf/arch/arm64/tests/regs_load.S
>>> @@ -0,0 +1,39 @@
>>> +#include <linux/linkage.h>
>>> +
>>> +/*
>>> + * Implementation of void perf_regs_load(u64 *regs);
>>> + *
>>> + * This functions fills in the 'regs' buffer from the actual registers values,
>>> + * in the way the perf built-in unwinding test expects them:
>>> + * - the PC at the time at the call to this function. Since this function
>>> + *   is called using a bl instruction, the PC value is taken from LR,
>>
>> Is it guaranteed that this function is always invoked with a branch with
>> link instruction, or is that just what current compiler versions are
>> doing? I couldn't see where we would get that guarantee from.
> The current compiler implements the call as a bl instruction.
>
>> If it is called with a branch with link, then the LR value will be the
>> PC at call time + 4, rather than just the exact PC at call time. If not
>> then we don't have a guaranteed relationship between the PC at call time
>> and the current LR value.
>>
>> If the only place that perf_regs_load is used is a single test which
>> doesn't care about the precise PC at the time of the call, then it's
>> probably OK to use the LR value, but we should be careful to document
>> what the faked-up PC actually is and how we expect it to be used.
> The code is only used by an unwinding test. The unwinding code
> resolves the function name from an address range found in the dwarf
> information so in principle it is ok to use the PC/LR at the time of
> the call to a function.
>
> Is the comment above OK or do you want an update of the code as well?
What do you think?

Regards,
Jean

>
>>
>>> + * - the current SP (not touched by this function),
>>> + * - the current value of LR is merely retrieved and stored because the
>>> + *   value before the call to this function is unknown at this time; it will
>>> + *   be unwound from the dwarf information in unwind__get_entries.
>>> + */
>>> +
>>> +.text
>>> +.type perf_regs_load,%function
>>> +ENTRY(perf_regs_load)
>>> +     stp x0,  x1,  [x0], #16 // store x0..x29
>>> +     stp x2,  x3,  [x0], #16
>>> +     stp x4,  x5,  [x0], #16
>>> +     stp x6,  x7,  [x0], #16
>>> +     stp x8,  x9,  [x0], #16
>>> +     stp x10, x11, [x0], #16
>>> +     stp x12, x13, [x0], #16
>>> +     stp x14, x15, [x0], #16
>>> +     stp x16, x17, [x0], #16
>>> +     stp x18, x19, [x0], #16
>>> +     stp x20, x21, [x0], #16
>>> +     stp x22, x23, [x0], #16
>>> +     stp x24, x25, [x0], #16
>>> +     stp x26, x27, [x0], #16
>>> +     stp x28, x29, [x0], #16
>>> +     mov x1,  sp
>>> +     stp x30, x1,  [x0], #16 // store lr and sp
>>> +     str x30, [x0]           // store pc as lr in order to skip the call
>>> +                             //  to this function
>>
>> It might be better to word this a "store the lr in place of the pc". To
>> me at least the current wording implies the opposite of what the code
>> seems to be doing.
> Ok the last comment can be updated.
>
> Thanks!
> Jean
>
>>
>> Cheers,
>> Mark.
>> --
>> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
>> the body of a message to majordomo@vger.kernel.org
>> More majordomo info at  http://vger.kernel.org/majordomo-info.html
>> Please read the FAQ at  http://www.tux.org/lkml/
Jean Pihet April 22, 2014, 8:13 a.m. UTC | #4
Hi Mark, Will,

Ping on this series. Can you please check? I

Regards,
Jean


On 25 March 2014 16:23, Jean Pihet <jean.pihet@linaro.org> wrote:
> Hi Mark,
>
> On 21 March 2014 16:11, Mark Rutland <mark.rutland@arm.com> wrote:
>> Hi Jean,
>>
>> On Wed, Mar 19, 2014 at 09:42:33AM +0000, Jean Pihet wrote:
>>> Introducing perf_regs_load function, which is going
>>> to be used for dwarf unwind test in following patches.
>>>
>>> It takes single argument as a pointer to the regs dump
>>> buffer and populates it with current registers values, as
>>> expected by the perf built-in unwinding test.
>>>
>>> Signed-off-by: Jean Pihet <jean.pihet@linaro.org>
>>> Cc: Steve Capper <steve.capper@linaro.org>
>>> Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
>>> Cc: Frederic Weisbecker <fweisbec@gmail.com>
>>> Cc: Ingo Molnar <mingo@kernel.org>
>>> Cc: Namhyung Kim <namhyung@kernel.org>
>>> Cc: Paul Mackerras <paulus@samba.org>
>>> Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
>>> Cc: Arnaldo Carvalho de Melo <acme@ghostprotocols.net>
>>> Cc: David Ahern <dsahern@gmail.com>
>>> Cc: Jiri Olsa <jolsa@redhat.com>
>>> ---
>>>  tools/perf/arch/arm64/Makefile            |  1 +
>>>  tools/perf/arch/arm64/include/perf_regs.h |  2 ++
>>>  tools/perf/arch/arm64/tests/regs_load.S   | 39 +++++++++++++++++++++++++++++++
>>>  3 files changed, 42 insertions(+)
>>>  create mode 100644 tools/perf/arch/arm64/tests/regs_load.S
>>>
>>> diff --git a/tools/perf/arch/arm64/Makefile b/tools/perf/arch/arm64/Makefile
>>> index 67e9b3d..9b8f87e 100644
>>> --- a/tools/perf/arch/arm64/Makefile
>>> +++ b/tools/perf/arch/arm64/Makefile
>>> @@ -4,4 +4,5 @@ LIB_OBJS += $(OUTPUT)arch/$(ARCH)/util/dwarf-regs.o
>>>  endif
>>>  ifndef NO_LIBUNWIND
>>>  LIB_OBJS += $(OUTPUT)arch/$(ARCH)/util/unwind-libunwind.o
>>> +LIB_OBJS += $(OUTPUT)arch/$(ARCH)/tests/regs_load.o
>>>  endif
>>> diff --git a/tools/perf/arch/arm64/include/perf_regs.h b/tools/perf/arch/arm64/include/perf_regs.h
>>> index 2359546..1e052f1 100644
>>> --- a/tools/perf/arch/arm64/include/perf_regs.h
>>> +++ b/tools/perf/arch/arm64/include/perf_regs.h
>>> @@ -9,6 +9,8 @@
>>>  #define PERF_REG_IP  PERF_REG_ARM64_PC
>>>  #define PERF_REG_SP  PERF_REG_ARM64_SP
>>>
>>> +void perf_regs_load(u64 *regs);
>>> +
>>>  static inline const char *perf_reg_name(int id)
>>>  {
>>>       switch (id) {
>>> diff --git a/tools/perf/arch/arm64/tests/regs_load.S b/tools/perf/arch/arm64/tests/regs_load.S
>>> new file mode 100644
>>> index 0000000..92ab968
>>> --- /dev/null
>>> +++ b/tools/perf/arch/arm64/tests/regs_load.S
>>> @@ -0,0 +1,39 @@
>>> +#include <linux/linkage.h>
>>> +
>>> +/*
>>> + * Implementation of void perf_regs_load(u64 *regs);
>>> + *
>>> + * This functions fills in the 'regs' buffer from the actual registers values,
>>> + * in the way the perf built-in unwinding test expects them:
>>> + * - the PC at the time at the call to this function. Since this function
>>> + *   is called using a bl instruction, the PC value is taken from LR,
>>
>> Is it guaranteed that this function is always invoked with a branch with
>> link instruction, or is that just what current compiler versions are
>> doing? I couldn't see where we would get that guarantee from.
> The current compiler implements the call as a bl instruction.
>
>> If it is called with a branch with link, then the LR value will be the
>> PC at call time + 4, rather than just the exact PC at call time. If not
>> then we don't have a guaranteed relationship between the PC at call time
>> and the current LR value.
>>
>> If the only place that perf_regs_load is used is a single test which
>> doesn't care about the precise PC at the time of the call, then it's
>> probably OK to use the LR value, but we should be careful to document
>> what the faked-up PC actually is and how we expect it to be used.
> The code is only used by an unwinding test. The unwinding code
> resolves the function name from an address range found in the dwarf
> information so in principle it is ok to use the PC/LR at the time of
> the call to a function.
>
> Is the comment above OK or do you want an update of the code as well?
>
>>
>>> + * - the current SP (not touched by this function),
>>> + * - the current value of LR is merely retrieved and stored because the
>>> + *   value before the call to this function is unknown at this time; it will
>>> + *   be unwound from the dwarf information in unwind__get_entries.
>>> + */
>>> +
>>> +.text
>>> +.type perf_regs_load,%function
>>> +ENTRY(perf_regs_load)
>>> +     stp x0,  x1,  [x0], #16 // store x0..x29
>>> +     stp x2,  x3,  [x0], #16
>>> +     stp x4,  x5,  [x0], #16
>>> +     stp x6,  x7,  [x0], #16
>>> +     stp x8,  x9,  [x0], #16
>>> +     stp x10, x11, [x0], #16
>>> +     stp x12, x13, [x0], #16
>>> +     stp x14, x15, [x0], #16
>>> +     stp x16, x17, [x0], #16
>>> +     stp x18, x19, [x0], #16
>>> +     stp x20, x21, [x0], #16
>>> +     stp x22, x23, [x0], #16
>>> +     stp x24, x25, [x0], #16
>>> +     stp x26, x27, [x0], #16
>>> +     stp x28, x29, [x0], #16
>>> +     mov x1,  sp
>>> +     stp x30, x1,  [x0], #16 // store lr and sp
>>> +     str x30, [x0]           // store pc as lr in order to skip the call
>>> +                             //  to this function
>>
>> It might be better to word this a "store the lr in place of the pc". To
>> me at least the current wording implies the opposite of what the code
>> seems to be doing.
> Ok the last comment can be updated.
>
> Thanks!
> Jean
>
>>
>> Cheers,
>> Mark.
>> --
>> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
>> the body of a message to majordomo@vger.kernel.org
>> More majordomo info at  http://vger.kernel.org/majordomo-info.html
>> Please read the FAQ at  http://www.tux.org/lkml/
Will Deacon April 22, 2014, 10:37 a.m. UTC | #5
On Tue, Apr 22, 2014 at 09:13:33AM +0100, Jean Pihet wrote:
> Hi Mark, Will,

Hi Jean,

> Ping on this series. Can you please check? I

Do you have a pointer to the latest version of your code please? The email
backlog I have seems to end with MarkR saying he would take a look.

Will
Mark Rutland April 22, 2014, 1:24 p.m. UTC | #6
On Tue, Apr 22, 2014 at 11:37:44AM +0100, Will Deacon wrote:
> On Tue, Apr 22, 2014 at 09:13:33AM +0100, Jean Pihet wrote:
> > Hi Mark, Will,
> 
> Hi Jean,
> 
> > Ping on this series. Can you please check? I
> 
> Do you have a pointer to the latest version of your code please? The email
> backlog I have seems to end with MarkR saying he would take a look.

The last posting I saw was [1-6]. The lack of reply is my fault, as I
lost track of the thread.

Cheers,
Mark.

[1] http://lists.infradead.org/pipermail/linux-arm-kernel/2014-March/241470.html
[2] http://lists.infradead.org/pipermail/linux-arm-kernel/2014-March/241467.html
[3] http://lists.infradead.org/pipermail/linux-arm-kernel/2014-March/242396.html
[4] http://lists.infradead.org/pipermail/linux-arm-kernel/2014-March/242998.html
[5] http://lists.infradead.org/pipermail/linux-arm-kernel/2014-March/241466.html
[6] http://lists.infradead.org/pipermail/linux-arm-kernel/2014-March/241468.html
Mark Rutland April 22, 2014, 1:42 p.m. UTC | #7
Hi Jean,

Apologies for the delay on this.

On Tue, Mar 25, 2014 at 03:23:26PM +0000, Jean Pihet wrote:
> Hi Mark,
> 
> On 21 March 2014 16:11, Mark Rutland <mark.rutland@arm.com> wrote:
> > Hi Jean,
> >
> > On Wed, Mar 19, 2014 at 09:42:33AM +0000, Jean Pihet wrote:
> >> Introducing perf_regs_load function, which is going
> >> to be used for dwarf unwind test in following patches.
> >>
> >> It takes single argument as a pointer to the regs dump
> >> buffer and populates it with current registers values, as
> >> expected by the perf built-in unwinding test.
> >>
> >> Signed-off-by: Jean Pihet <jean.pihet@linaro.org>
> >> Cc: Steve Capper <steve.capper@linaro.org>
> >> Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
> >> Cc: Frederic Weisbecker <fweisbec@gmail.com>
> >> Cc: Ingo Molnar <mingo@kernel.org>
> >> Cc: Namhyung Kim <namhyung@kernel.org>
> >> Cc: Paul Mackerras <paulus@samba.org>
> >> Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
> >> Cc: Arnaldo Carvalho de Melo <acme@ghostprotocols.net>
> >> Cc: David Ahern <dsahern@gmail.com>
> >> Cc: Jiri Olsa <jolsa@redhat.com>
> >> ---
> >>  tools/perf/arch/arm64/Makefile            |  1 +
> >>  tools/perf/arch/arm64/include/perf_regs.h |  2 ++
> >>  tools/perf/arch/arm64/tests/regs_load.S   | 39 +++++++++++++++++++++++++++++++
> >>  3 files changed, 42 insertions(+)
> >>  create mode 100644 tools/perf/arch/arm64/tests/regs_load.S
> >>
> >> diff --git a/tools/perf/arch/arm64/Makefile b/tools/perf/arch/arm64/Makefile
> >> index 67e9b3d..9b8f87e 100644
> >> --- a/tools/perf/arch/arm64/Makefile
> >> +++ b/tools/perf/arch/arm64/Makefile
> >> @@ -4,4 +4,5 @@ LIB_OBJS += $(OUTPUT)arch/$(ARCH)/util/dwarf-regs.o
> >>  endif
> >>  ifndef NO_LIBUNWIND
> >>  LIB_OBJS += $(OUTPUT)arch/$(ARCH)/util/unwind-libunwind.o
> >> +LIB_OBJS += $(OUTPUT)arch/$(ARCH)/tests/regs_load.o
> >>  endif
> >> diff --git a/tools/perf/arch/arm64/include/perf_regs.h b/tools/perf/arch/arm64/include/perf_regs.h
> >> index 2359546..1e052f1 100644
> >> --- a/tools/perf/arch/arm64/include/perf_regs.h
> >> +++ b/tools/perf/arch/arm64/include/perf_regs.h
> >> @@ -9,6 +9,8 @@
> >>  #define PERF_REG_IP  PERF_REG_ARM64_PC
> >>  #define PERF_REG_SP  PERF_REG_ARM64_SP
> >>
> >> +void perf_regs_load(u64 *regs);
> >> +
> >>  static inline const char *perf_reg_name(int id)
> >>  {
> >>       switch (id) {
> >> diff --git a/tools/perf/arch/arm64/tests/regs_load.S b/tools/perf/arch/arm64/tests/regs_load.S
> >> new file mode 100644
> >> index 0000000..92ab968
> >> --- /dev/null
> >> +++ b/tools/perf/arch/arm64/tests/regs_load.S
> >> @@ -0,0 +1,39 @@
> >> +#include <linux/linkage.h>
> >> +
> >> +/*
> >> + * Implementation of void perf_regs_load(u64 *regs);
> >> + *
> >> + * This functions fills in the 'regs' buffer from the actual registers values,
> >> + * in the way the perf built-in unwinding test expects them:
> >> + * - the PC at the time at the call to this function. Since this function
> >> + *   is called using a bl instruction, the PC value is taken from LR,
> >
> > Is it guaranteed that this function is always invoked with a branch with
> > link instruction, or is that just what current compiler versions are
> > doing? I couldn't see where we would get that guarantee from.
> The current compiler implements the call as a bl instruction.

While I don't think we can rely on the compiler using a bl to call the
function it shouldn't matter here if we only care about the LR value
being an address within the caller, as it doesn't look amenable to tail
call optimization.

> > If it is called with a branch with link, then the LR value will be the
> > PC at call time + 4, rather than just the exact PC at call time. If not
> > then we don't have a guaranteed relationship between the PC at call time
> > and the current LR value.
> >
> > If the only place that perf_regs_load is used is a single test which
> > doesn't care about the precise PC at the time of the call, then it's
> > probably OK to use the LR value, but we should be careful to document
> > what the faked-up PC actually is and how we expect it to be used.
> The code is only used by an unwinding test. The unwinding code
> resolves the function name from an address range found in the dwarf
> information so in principle it is ok to use the PC/LR at the time of
> the call to a function.
> 
> Is the comment above OK or do you want an update of the code as well?

If we just need an (arbitrary) address within the caller, a comment
update should be fine.

> >> + * - the current SP (not touched by this function),
> >> + * - the current value of LR is merely retrieved and stored because the
> >> + *   value before the call to this function is unknown at this time; it will
> >> + *   be unwound from the dwarf information in unwind__get_entries.
> >> + */
> >> +
> >> +.text
> >> +.type perf_regs_load,%function
> >> +ENTRY(perf_regs_load)
> >> +     stp x0,  x1,  [x0], #16 // store x0..x29
> >> +     stp x2,  x3,  [x0], #16
> >> +     stp x4,  x5,  [x0], #16
> >> +     stp x6,  x7,  [x0], #16
> >> +     stp x8,  x9,  [x0], #16
> >> +     stp x10, x11, [x0], #16
> >> +     stp x12, x13, [x0], #16
> >> +     stp x14, x15, [x0], #16
> >> +     stp x16, x17, [x0], #16
> >> +     stp x18, x19, [x0], #16
> >> +     stp x20, x21, [x0], #16
> >> +     stp x22, x23, [x0], #16
> >> +     stp x24, x25, [x0], #16
> >> +     stp x26, x27, [x0], #16
> >> +     stp x28, x29, [x0], #16
> >> +     mov x1,  sp
> >> +     stp x30, x1,  [x0], #16 // store lr and sp
> >> +     str x30, [x0]           // store pc as lr in order to skip the call
> >> +                             //  to this function
> >
> > It might be better to word this a "store the lr in place of the pc". To
> > me at least the current wording implies the opposite of what the code
> > seems to be doing.
> Ok the last comment can be updated.

Ok, cheers.

With those changes I think this looks fine.

Thanks,
Mark.
Jean Pihet April 28, 2014, 1:10 p.m. UTC | #8
On 22 April 2014 15:42, Mark Rutland <mark.rutland@arm.com> wrote:
> Hi Jean,
>
> Apologies for the delay on this.
>
> On Tue, Mar 25, 2014 at 03:23:26PM +0000, Jean Pihet wrote:
>> Hi Mark,
>>
>> On 21 March 2014 16:11, Mark Rutland <mark.rutland@arm.com> wrote:
>> > Hi Jean,
>> >
>> > On Wed, Mar 19, 2014 at 09:42:33AM +0000, Jean Pihet wrote:
>> >> Introducing perf_regs_load function, which is going
>> >> to be used for dwarf unwind test in following patches.
>> >>
>> >> It takes single argument as a pointer to the regs dump
>> >> buffer and populates it with current registers values, as
>> >> expected by the perf built-in unwinding test.
>> >>
>> >> Signed-off-by: Jean Pihet <jean.pihet@linaro.org>
>> >> Cc: Steve Capper <steve.capper@linaro.org>
>> >> Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
>> >> Cc: Frederic Weisbecker <fweisbec@gmail.com>
>> >> Cc: Ingo Molnar <mingo@kernel.org>
>> >> Cc: Namhyung Kim <namhyung@kernel.org>
>> >> Cc: Paul Mackerras <paulus@samba.org>
>> >> Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
>> >> Cc: Arnaldo Carvalho de Melo <acme@ghostprotocols.net>
>> >> Cc: David Ahern <dsahern@gmail.com>
>> >> Cc: Jiri Olsa <jolsa@redhat.com>
>> >> ---
>> >>  tools/perf/arch/arm64/Makefile            |  1 +
>> >>  tools/perf/arch/arm64/include/perf_regs.h |  2 ++
>> >>  tools/perf/arch/arm64/tests/regs_load.S   | 39 +++++++++++++++++++++++++++++++
>> >>  3 files changed, 42 insertions(+)
>> >>  create mode 100644 tools/perf/arch/arm64/tests/regs_load.S
>> >>
>> >> diff --git a/tools/perf/arch/arm64/Makefile b/tools/perf/arch/arm64/Makefile
>> >> index 67e9b3d..9b8f87e 100644
>> >> --- a/tools/perf/arch/arm64/Makefile
>> >> +++ b/tools/perf/arch/arm64/Makefile
>> >> @@ -4,4 +4,5 @@ LIB_OBJS += $(OUTPUT)arch/$(ARCH)/util/dwarf-regs.o
>> >>  endif
>> >>  ifndef NO_LIBUNWIND
>> >>  LIB_OBJS += $(OUTPUT)arch/$(ARCH)/util/unwind-libunwind.o
>> >> +LIB_OBJS += $(OUTPUT)arch/$(ARCH)/tests/regs_load.o
>> >>  endif
>> >> diff --git a/tools/perf/arch/arm64/include/perf_regs.h b/tools/perf/arch/arm64/include/perf_regs.h
>> >> index 2359546..1e052f1 100644
>> >> --- a/tools/perf/arch/arm64/include/perf_regs.h
>> >> +++ b/tools/perf/arch/arm64/include/perf_regs.h
>> >> @@ -9,6 +9,8 @@
>> >>  #define PERF_REG_IP  PERF_REG_ARM64_PC
>> >>  #define PERF_REG_SP  PERF_REG_ARM64_SP
>> >>
>> >> +void perf_regs_load(u64 *regs);
>> >> +
>> >>  static inline const char *perf_reg_name(int id)
>> >>  {
>> >>       switch (id) {
>> >> diff --git a/tools/perf/arch/arm64/tests/regs_load.S b/tools/perf/arch/arm64/tests/regs_load.S
>> >> new file mode 100644
>> >> index 0000000..92ab968
>> >> --- /dev/null
>> >> +++ b/tools/perf/arch/arm64/tests/regs_load.S
>> >> @@ -0,0 +1,39 @@
>> >> +#include <linux/linkage.h>
>> >> +
>> >> +/*
>> >> + * Implementation of void perf_regs_load(u64 *regs);
>> >> + *
>> >> + * This functions fills in the 'regs' buffer from the actual registers values,
>> >> + * in the way the perf built-in unwinding test expects them:
>> >> + * - the PC at the time at the call to this function. Since this function
>> >> + *   is called using a bl instruction, the PC value is taken from LR,
>> >
>> > Is it guaranteed that this function is always invoked with a branch with
>> > link instruction, or is that just what current compiler versions are
>> > doing? I couldn't see where we would get that guarantee from.
>> The current compiler implements the call as a bl instruction.
>
> While I don't think we can rely on the compiler using a bl to call the
> function it shouldn't matter here if we only care about the LR value
> being an address within the caller, as it doesn't look amenable to tail
> call optimization.
>
>> > If it is called with a branch with link, then the LR value will be the
>> > PC at call time + 4, rather than just the exact PC at call time. If not
>> > then we don't have a guaranteed relationship between the PC at call time
>> > and the current LR value.
>> >
>> > If the only place that perf_regs_load is used is a single test which
>> > doesn't care about the precise PC at the time of the call, then it's
>> > probably OK to use the LR value, but we should be careful to document
>> > what the faked-up PC actually is and how we expect it to be used.
>> The code is only used by an unwinding test. The unwinding code
>> resolves the function name from an address range found in the dwarf
>> information so in principle it is ok to use the PC/LR at the time of
>> the call to a function.
>>
>> Is the comment above OK or do you want an update of the code as well?
>
> If we just need an (arbitrary) address within the caller, a comment
> update should be fine.
Yes that is the idea;

>
>> >> + * - the current SP (not touched by this function),
>> >> + * - the current value of LR is merely retrieved and stored because the
>> >> + *   value before the call to this function is unknown at this time; it will
>> >> + *   be unwound from the dwarf information in unwind__get_entries.
>> >> + */
>> >> +
>> >> +.text
>> >> +.type perf_regs_load,%function
>> >> +ENTRY(perf_regs_load)
>> >> +     stp x0,  x1,  [x0], #16 // store x0..x29
>> >> +     stp x2,  x3,  [x0], #16
>> >> +     stp x4,  x5,  [x0], #16
>> >> +     stp x6,  x7,  [x0], #16
>> >> +     stp x8,  x9,  [x0], #16
>> >> +     stp x10, x11, [x0], #16
>> >> +     stp x12, x13, [x0], #16
>> >> +     stp x14, x15, [x0], #16
>> >> +     stp x16, x17, [x0], #16
>> >> +     stp x18, x19, [x0], #16
>> >> +     stp x20, x21, [x0], #16
>> >> +     stp x22, x23, [x0], #16
>> >> +     stp x24, x25, [x0], #16
>> >> +     stp x26, x27, [x0], #16
>> >> +     stp x28, x29, [x0], #16
>> >> +     mov x1,  sp
>> >> +     stp x30, x1,  [x0], #16 // store lr and sp
>> >> +     str x30, [x0]           // store pc as lr in order to skip the call
>> >> +                             //  to this function
>> >
>> > It might be better to word this a "store the lr in place of the pc". To
>> > me at least the current wording implies the opposite of what the code
>> > seems to be doing.
>> Ok the last comment can be updated.
>
> Ok, cheers.
>
> With those changes I think this looks fine.
Ok let me send a refreshed version in a bit. If the wording is Ok I
will refresh the ARM patches for the same topic and re-submit them.

>
> Thanks,
> Mark.

Thanks,
Jean

> --
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at  http://www.tux.org/lkml/
diff mbox

Patch

diff --git a/tools/perf/arch/arm64/Makefile b/tools/perf/arch/arm64/Makefile
index 67e9b3d..9b8f87e 100644
--- a/tools/perf/arch/arm64/Makefile
+++ b/tools/perf/arch/arm64/Makefile
@@ -4,4 +4,5 @@  LIB_OBJS += $(OUTPUT)arch/$(ARCH)/util/dwarf-regs.o
 endif
 ifndef NO_LIBUNWIND
 LIB_OBJS += $(OUTPUT)arch/$(ARCH)/util/unwind-libunwind.o
+LIB_OBJS += $(OUTPUT)arch/$(ARCH)/tests/regs_load.o
 endif
diff --git a/tools/perf/arch/arm64/include/perf_regs.h b/tools/perf/arch/arm64/include/perf_regs.h
index 2359546..1e052f1 100644
--- a/tools/perf/arch/arm64/include/perf_regs.h
+++ b/tools/perf/arch/arm64/include/perf_regs.h
@@ -9,6 +9,8 @@ 
 #define PERF_REG_IP	PERF_REG_ARM64_PC
 #define PERF_REG_SP	PERF_REG_ARM64_SP
 
+void perf_regs_load(u64 *regs);
+
 static inline const char *perf_reg_name(int id)
 {
 	switch (id) {
diff --git a/tools/perf/arch/arm64/tests/regs_load.S b/tools/perf/arch/arm64/tests/regs_load.S
new file mode 100644
index 0000000..92ab968
--- /dev/null
+++ b/tools/perf/arch/arm64/tests/regs_load.S
@@ -0,0 +1,39 @@ 
+#include <linux/linkage.h>
+
+/*
+ * Implementation of void perf_regs_load(u64 *regs);
+ *
+ * This functions fills in the 'regs' buffer from the actual registers values,
+ * in the way the perf built-in unwinding test expects them:
+ * - the PC at the time at the call to this function. Since this function
+ *   is called using a bl instruction, the PC value is taken from LR,
+ * - the current SP (not touched by this function),
+ * - the current value of LR is merely retrieved and stored because the
+ *   value before the call to this function is unknown at this time; it will
+ *   be unwound from the dwarf information in unwind__get_entries.
+ */
+
+.text
+.type perf_regs_load,%function
+ENTRY(perf_regs_load)
+	stp x0,  x1,  [x0], #16	// store x0..x29
+	stp x2,  x3,  [x0], #16
+	stp x4,  x5,  [x0], #16
+	stp x6,  x7,  [x0], #16
+	stp x8,  x9,  [x0], #16
+	stp x10, x11, [x0], #16
+	stp x12, x13, [x0], #16
+	stp x14, x15, [x0], #16
+	stp x16, x17, [x0], #16
+	stp x18, x19, [x0], #16
+	stp x20, x21, [x0], #16
+	stp x22, x23, [x0], #16
+	stp x24, x25, [x0], #16
+	stp x26, x27, [x0], #16
+	stp x28, x29, [x0], #16
+	mov x1,  sp
+	stp x30, x1,  [x0], #16	// store lr and sp
+	str x30, [x0]		// store pc as lr in order to skip the call
+				//  to this function
+	ret
+ENDPROC(perf_regs_load)