]> git.itanic.dy.fi Git - linux-stable/commitdiff
perf top: Add --branch-history option
authorAdrian Hunter <adrian.hunter@intel.com>
Thu, 30 Mar 2023 13:18:32 +0000 (16:18 +0300)
committerArnaldo Carvalho de Melo <acme@redhat.com>
Tue, 4 Apr 2023 12:39:56 +0000 (09:39 -0300)
Add --branch-history option, to act the same as that option does for
perf report.

Example:

  $ cat tcallf.c
  volatile a = 10000, b = 100000, c;

  __attribute__((noinline)) f2()
  {
          c = a / b;
  }

  __attribute__((noinline)) f1()
  {
          f2();
          f2();
  }
  main()
  {
          while (1)
                  f1();
  }
  $ gcc -w -g -o tcallf tcallf.c
  $ ./tcallf &
  [1] 29409
  $ perf top -e cycles:u  -t $(pidof tcallf) --stdio --no-children --branch-history
     PerfTop:    3819 irqs/sec  kernel: 0.0%  exact:  0.0% lost: 0/0 drop: 0/0 [4000Hz cycles:u],  (target_tid: 29409)
  --------------------------------------------------------------------------------------------------------------------

      49.01%  tcallf.c:5   [.] f2    tcallf
              |
              |--24.91%--f2 tcallf.c:4
              |          |
              |          |--17.14%--f1 tcallf.c:11 (cycles:1)
              |          |          f1 tcallf.c:11
              |          |          f2 tcallf.c:6 (cycles:3)
              |          |          f2 tcallf.c:4
              |          |          f1 tcallf.c:10 (cycles:2)
              |          |          f1 tcallf.c:9
              |          |          main tcallf.c:16 (cycles:1)
              |          |          main tcallf.c:16
              |          |          main tcallf.c:16 (cycles:1)
              |          |          main tcallf.c:16
              |          |          f1 tcallf.c:12 (cycles:1)
              |          |          f1 tcallf.c:12
              |          |          f2 tcallf.c:6 (cycles:3)
              |          |          f2 tcallf.c:4
              |          |          f1 tcallf.c:11 (cycles:1 iter:1 avg_cycles:12)
              |          |          f1 tcallf.c:11
              |          |          f2 tcallf.c:6 (cycles:3 iter:1 avg_cycles:12)
              |          |          f2 tcallf.c:4
              |          |          f1 tcallf.c:10 (cycles:2 iter:1 avg_cycles:12)
              |          |
              |           --7.78%--f1 tcallf.c:10 (cycles:2)
              |                     f1 tcallf.c:9
              |                     main tcallf.c:16 (cycles:1)
              |                     main tcallf.c:16
              |                     main tcallf.c:16 (cycles:1)
              |                     main tcallf.c:16
              |                     f1 tcallf.c:12 (cycles:1)
              |                     f1 tcallf.c:12
              |                     f2 tcallf.c:6 (cycles:3)
              |                     f2 tcallf.c:4
              |                     f1 tcallf.c:11 (cycles:1)
              |                     f1 tcallf.c:11
              |                     f2 tcallf.c:6 (cycles:3)
              |                     f2 tcallf.c:4
              |                     f1 tcallf.c:10 (cycles:2 iter:1 avg_cycles:12)
              |                     f1 tcallf.c:9
              |                     main tcallf.c:16 (cycles:1 iter:1 avg_cycles:12)
              |                     main tcallf.c:16
              |                     main tcallf.c:16 (cycles:1 iter:1 avg_cycles:12)
  ...

  $ pkill tcallf
  [1]+  Terminated              ./tcallf

Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Ian Rogers <irogers@google.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Link: https://lore.kernel.org/r/20230330131833.12864-2-adrian.hunter@intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
tools/perf/Documentation/perf-top.txt
tools/perf/builtin-top.c

index 619cc8143ad522539348227c2515c7b289c0e154..3c202ec080ba2817dcb643e8d3941fe46068c1d7 100644 (file)
@@ -254,6 +254,10 @@ Default is to monitor all CPUS.
        The various filters must be specified as a comma separated list: --branch-filter any_ret,u,k
        Note that this feature may not be available on all processors.
 
+--branch-history::
+       Add the addresses of sampled taken branches to the callstack.
+       This allows to examine the path the program took to each sample.
+
 --raw-trace::
        When displaying traceevent output, do not use print fmt or plugins.
 
index 82c6c065830dc562da73dbb51d8f830a8d50186f..2c985cfea517988a4569b1e6d70d52cb3fb2b697 100644 (file)
@@ -1437,6 +1437,7 @@ int cmd_top(int argc, const char **argv)
                .max_stack           = sysctl__max_stack(),
                .nr_threads_synthesize = UINT_MAX,
        };
+       bool branch_call_mode = false;
        struct record_opts *opts = &top.record_opts;
        struct target *target = &opts->target;
        const char *disassembler_style = NULL, *objdump_path = NULL, *addr2line_path = NULL;
@@ -1551,6 +1552,8 @@ int cmd_top(int argc, const char **argv)
        OPT_CALLBACK('j', "branch-filter", &opts->branch_stack,
                     "branch filter mask", "branch stack filter modes",
                     parse_branch_stack),
+       OPT_BOOLEAN(0, "branch-history", &branch_call_mode,
+                   "add last branch records to call history"),
        OPT_BOOLEAN(0, "raw-trace", &symbol_conf.raw_trace,
                    "Show raw trace event output (do not use print fmt or plugins)"),
        OPT_BOOLEAN(0, "hierarchy", &symbol_conf.report_hierarchy,
@@ -1677,6 +1680,20 @@ int cmd_top(int argc, const char **argv)
                goto out_delete_evlist;
        }
 
+       if (branch_call_mode) {
+               if (!opts->branch_stack)
+                       opts->branch_stack = PERF_SAMPLE_BRANCH_ANY;
+               symbol_conf.use_callchain = true;
+               callchain_param.key = CCKEY_ADDRESS;
+               callchain_param.branch_callstack = true;
+               callchain_param.enabled = true;
+               if (callchain_param.record_mode == CALLCHAIN_NONE)
+                       callchain_param.record_mode = CALLCHAIN_FP;
+               callchain_register_param(&callchain_param);
+               if (!sort_order)
+                       sort_order = "srcline,symbol,dso";
+       }
+
        if (opts->branch_stack && callchain_param.enabled)
                symbol_conf.show_branchflag_count = true;