diff mbox series

[09/14] perf annotate-data: Handle call instructions

Message ID 20240202220459.527138-10-namhyung@kernel.org (mailing list archive)
State Superseded
Headers show
Series perf tools: Remaining bits of data type profiling (v5) | expand

Commit Message

Namhyung Kim Feb. 2, 2024, 10:04 p.m. UTC
When updating instruction states, the call instruction should play a
role since it can change the register states.  For simplicity, mark some
registers as scratch registers (should be arch-dependent), and
invalidate them all after a function call.

If the function returns something, the designated register (ret_reg)
will have the type info.

Signed-off-by: Namhyung Kim <namhyung@kernel.org>
---
 tools/perf/util/annotate-data.c | 45 +++++++++++++++++++++++++++++++--
 1 file changed, 43 insertions(+), 2 deletions(-)

Comments

Ian Rogers Feb. 3, 2024, 3:09 a.m. UTC | #1
On Fri, Feb 2, 2024 at 2:05 PM Namhyung Kim <namhyung@kernel.org> wrote:
>
> When updating instruction states, the call instruction should play a
> role since it can change the register states.  For simplicity, mark some
> registers as scratch registers (should be arch-dependent), and
> invalidate them all after a function call.

nit: Volatile or caller-save would be a more conventional name than scratch.

Thanks,
Ian

> If the function returns something, the designated register (ret_reg)
> will have the type info.
>
> Signed-off-by: Namhyung Kim <namhyung@kernel.org>
> ---
>  tools/perf/util/annotate-data.c | 45 +++++++++++++++++++++++++++++++--
>  1 file changed, 43 insertions(+), 2 deletions(-)
>
> diff --git a/tools/perf/util/annotate-data.c b/tools/perf/util/annotate-data.c
> index e46e162c783f..185cb896b9d6 100644
> --- a/tools/perf/util/annotate-data.c
> +++ b/tools/perf/util/annotate-data.c
> @@ -23,10 +23,14 @@
>  #include "symbol.h"
>  #include "symbol_conf.h"
>
> -/* Type information in a register, valid when ok is true */
> +/*
> + * Type information in a register, valid when @ok is true.
> + * The @scratch registers are invalidated after a function call.
> + */
>  struct type_state_reg {
>         Dwarf_Die type;
>         bool ok;
> +       bool scratch;
>  };
>
>  /* Type information in a stack location, dynamically allocated */
> @@ -50,6 +54,7 @@ struct type_state_stack {
>  struct type_state {
>         struct type_state_reg regs[TYPE_STATE_MAX_REGS];
>         struct list_head stack_vars;
> +       int ret_reg;
>  };
>
>  static bool has_reg_type(struct type_state *state, int reg)
> @@ -57,10 +62,23 @@ static bool has_reg_type(struct type_state *state, int reg)
>         return (unsigned)reg < ARRAY_SIZE(state->regs);
>  }
>
> -void init_type_state(struct type_state *state, struct arch *arch __maybe_unused)
> +void init_type_state(struct type_state *state, struct arch *arch)
>  {
>         memset(state, 0, sizeof(*state));
>         INIT_LIST_HEAD(&state->stack_vars);
> +
> +       if (arch__is(arch, "x86")) {
> +               state->regs[0].scratch = true;
> +               state->regs[1].scratch = true;
> +               state->regs[2].scratch = true;
> +               state->regs[4].scratch = true;
> +               state->regs[5].scratch = true;
> +               state->regs[8].scratch = true;
> +               state->regs[9].scratch = true;
> +               state->regs[10].scratch = true;
> +               state->regs[11].scratch = true;
> +               state->ret_reg = 0;
> +       }
>  }
>
>  void exit_type_state(struct type_state *state)
> @@ -417,6 +435,29 @@ void update_insn_state(struct type_state *state, struct data_loc_info *dloc,
>         int fbreg = dloc->fbreg;
>         int fboff = 0;
>
> +       if (ins__is_call(&dl->ins)) {
> +               Dwarf_Die func_die;
> +
> +               /* __fentry__ will preserve all registers */
> +               if (dl->ops.target.sym &&
> +                   !strcmp(dl->ops.target.sym->name, "__fentry__"))
> +                       return;
> +
> +               /* Otherwise invalidate scratch registers after call */
> +               for (unsigned i = 0; i < ARRAY_SIZE(state->regs); i++) {
> +                       if (state->regs[i].scratch)
> +                               state->regs[i].ok = false;
> +               }
> +
> +               /* Update register with the return type (if any) */
> +               if (die_find_realfunc(cu_die, dl->ops.target.addr, &func_die) &&
> +                   die_get_real_type(&func_die, &type_die)) {
> +                       state->regs[state->ret_reg].type = type_die;
> +                       state->regs[state->ret_reg].ok = true;
> +               }
> +               return;
> +       }
> +
>         /* FIXME: remove x86 specific code and handle more instructions like LEA */
>         if (!strstr(dl->ins.name, "mov"))
>                 return;
> --
> 2.43.0.594.gd9cf4e227d-goog
>
Namhyung Kim Feb. 6, 2024, 11:17 p.m. UTC | #2
On Fri, Feb 2, 2024 at 7:09 PM Ian Rogers <irogers@google.com> wrote:
>
> On Fri, Feb 2, 2024 at 2:05 PM Namhyung Kim <namhyung@kernel.org> wrote:
> >
> > When updating instruction states, the call instruction should play a
> > role since it can change the register states.  For simplicity, mark some
> > registers as scratch registers (should be arch-dependent), and
> > invalidate them all after a function call.
>
> nit: Volatile or caller-save would be a more conventional name than scratch.

'volatile' is a keyword and 'caller_saved' seems somewhat verbose.
Maybe 'temporary'?

Thanks,
Namhyung
Ian Rogers Feb. 6, 2024, 11:36 p.m. UTC | #3
On Tue, Feb 6, 2024 at 3:17 PM Namhyung Kim <namhyung@kernel.org> wrote:
>
> On Fri, Feb 2, 2024 at 7:09 PM Ian Rogers <irogers@google.com> wrote:
> >
> > On Fri, Feb 2, 2024 at 2:05 PM Namhyung Kim <namhyung@kernel.org> wrote:
> > >
> > > When updating instruction states, the call instruction should play a
> > > role since it can change the register states.  For simplicity, mark some
> > > registers as scratch registers (should be arch-dependent), and
> > > invalidate them all after a function call.
> >
> > nit: Volatile or caller-save would be a more conventional name than scratch.
>
> 'volatile' is a keyword and 'caller_saved' seems somewhat verbose.
> Maybe 'temporary'?

Sgtm, perhaps temp for brevity and the documentation to call them caller save?

Thanks,
Ian

> Thanks,
> Namhyung
Namhyung Kim Feb. 7, 2024, 1:29 a.m. UTC | #4
On Tue, Feb 6, 2024 at 3:44 PM Arnaldo Carvalho de Melo
<arnaldo.melo@gmail.com> wrote:
>
>
>
> On Tue, Feb 6, 2024, 8:36 PM Ian Rogers <irogers@google.com> wrote:
>>
>> On Tue, Feb 6, 2024 at 3:17 PM Namhyung Kim <namhyung@kernel.org> wrote:
>> >
>> > On Fri, Feb 2, 2024 at 7:09 PM Ian Rogers <irogers@google.com> wrote:
>> > >
>> > > On Fri, Feb 2, 2024 at 2:05 PM Namhyung Kim <namhyung@kernel.org> wrote:
>> > > >
>> > > > When updating instruction states, the call instruction should play a
>> > > > role since it can change the register states.  For simplicity, mark some
>> > > > registers as scratch registers (should be arch-dependent), and
>> > > > invalidate them all after a function call.
>> > >
>> > > nit: Volatile or caller-save would be a more conventional name than scratch.
>> >
>> > 'volatile' is a keyword and 'caller_saved' seems somewhat verbose.
>> > Maybe 'temporary'?
>>
>> Sgtm, perhaps temp for brevity and the documentation to call them caller save?
>
>
>
> "caller_saved" seems to be the conventional name doesn't look too long to use to help in reading this code by new people that have read the literature.

Ok, as you both requested, I will use "caller_saved". :)

Thanks,
Namhyung


>
> For instance, from a quick Google search:
>
> https://stackoverflow.com/questions/9268586/what-are-callee-and-caller-saved-registers
>
> - Arnaldo
diff mbox series

Patch

diff --git a/tools/perf/util/annotate-data.c b/tools/perf/util/annotate-data.c
index e46e162c783f..185cb896b9d6 100644
--- a/tools/perf/util/annotate-data.c
+++ b/tools/perf/util/annotate-data.c
@@ -23,10 +23,14 @@ 
 #include "symbol.h"
 #include "symbol_conf.h"
 
-/* Type information in a register, valid when ok is true */
+/*
+ * Type information in a register, valid when @ok is true.
+ * The @scratch registers are invalidated after a function call.
+ */
 struct type_state_reg {
 	Dwarf_Die type;
 	bool ok;
+	bool scratch;
 };
 
 /* Type information in a stack location, dynamically allocated */
@@ -50,6 +54,7 @@  struct type_state_stack {
 struct type_state {
 	struct type_state_reg regs[TYPE_STATE_MAX_REGS];
 	struct list_head stack_vars;
+	int ret_reg;
 };
 
 static bool has_reg_type(struct type_state *state, int reg)
@@ -57,10 +62,23 @@  static bool has_reg_type(struct type_state *state, int reg)
 	return (unsigned)reg < ARRAY_SIZE(state->regs);
 }
 
-void init_type_state(struct type_state *state, struct arch *arch __maybe_unused)
+void init_type_state(struct type_state *state, struct arch *arch)
 {
 	memset(state, 0, sizeof(*state));
 	INIT_LIST_HEAD(&state->stack_vars);
+
+	if (arch__is(arch, "x86")) {
+		state->regs[0].scratch = true;
+		state->regs[1].scratch = true;
+		state->regs[2].scratch = true;
+		state->regs[4].scratch = true;
+		state->regs[5].scratch = true;
+		state->regs[8].scratch = true;
+		state->regs[9].scratch = true;
+		state->regs[10].scratch = true;
+		state->regs[11].scratch = true;
+		state->ret_reg = 0;
+	}
 }
 
 void exit_type_state(struct type_state *state)
@@ -417,6 +435,29 @@  void update_insn_state(struct type_state *state, struct data_loc_info *dloc,
 	int fbreg = dloc->fbreg;
 	int fboff = 0;
 
+	if (ins__is_call(&dl->ins)) {
+		Dwarf_Die func_die;
+
+		/* __fentry__ will preserve all registers */
+		if (dl->ops.target.sym &&
+		    !strcmp(dl->ops.target.sym->name, "__fentry__"))
+			return;
+
+		/* Otherwise invalidate scratch registers after call */
+		for (unsigned i = 0; i < ARRAY_SIZE(state->regs); i++) {
+			if (state->regs[i].scratch)
+				state->regs[i].ok = false;
+		}
+
+		/* Update register with the return type (if any) */
+		if (die_find_realfunc(cu_die, dl->ops.target.addr, &func_die) &&
+		    die_get_real_type(&func_die, &type_die)) {
+			state->regs[state->ret_reg].type = type_die;
+			state->regs[state->ret_reg].ok = true;
+		}
+		return;
+	}
+
 	/* FIXME: remove x86 specific code and handle more instructions like LEA */
 	if (!strstr(dl->ins.name, "mov"))
 		return;