]> git.itanic.dy.fi Git - linux-stable/blob - tools/perf/util/bpf_skel/bperf_cgroup.bpf.c
perf stat: Fix BPF program section name
[linux-stable] / tools / perf / util / bpf_skel / bperf_cgroup.bpf.c
1 // SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
2 // Copyright (c) 2021 Facebook
3 // Copyright (c) 2021 Google
4 #include "vmlinux.h"
5 #include <bpf/bpf_helpers.h>
6 #include <bpf/bpf_tracing.h>
7 #include <bpf/bpf_core_read.h>
8
9 #define MAX_LEVELS  10  // max cgroup hierarchy level: arbitrary
10 #define MAX_EVENTS  32  // max events per cgroup: arbitrary
11
12 // NOTE: many of map and global data will be modified before loading
13 //       from the userspace (perf tool) using the skeleton helpers.
14
15 // single set of global perf events to measure
16 struct {
17         __uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY);
18         __uint(key_size, sizeof(__u32));
19         __uint(value_size, sizeof(int));
20         __uint(max_entries, 1);
21 } events SEC(".maps");
22
23 // from cgroup id to event index
24 struct {
25         __uint(type, BPF_MAP_TYPE_HASH);
26         __uint(key_size, sizeof(__u64));
27         __uint(value_size, sizeof(__u32));
28         __uint(max_entries, 1);
29 } cgrp_idx SEC(".maps");
30
31 // per-cpu event snapshots to calculate delta
32 struct {
33         __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
34         __uint(key_size, sizeof(__u32));
35         __uint(value_size, sizeof(struct bpf_perf_event_value));
36 } prev_readings SEC(".maps");
37
38 // aggregated event values for each cgroup (per-cpu)
39 // will be read from the user-space
40 struct {
41         __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
42         __uint(key_size, sizeof(__u32));
43         __uint(value_size, sizeof(struct bpf_perf_event_value));
44 } cgrp_readings SEC(".maps");
45
46 const volatile __u32 num_events = 1;
47 const volatile __u32 num_cpus = 1;
48
49 int enabled = 0;
50 int use_cgroup_v2 = 0;
51
52 static inline int get_cgroup_v1_idx(__u32 *cgrps, int size)
53 {
54         struct task_struct *p = (void *)bpf_get_current_task();
55         struct cgroup *cgrp;
56         register int i = 0;
57         __u32 *elem;
58         int level;
59         int cnt;
60
61         cgrp = BPF_CORE_READ(p, cgroups, subsys[perf_event_cgrp_id], cgroup);
62         level = BPF_CORE_READ(cgrp, level);
63
64         for (cnt = 0; i < MAX_LEVELS; i++) {
65                 __u64 cgrp_id;
66
67                 if (i > level)
68                         break;
69
70                 // convert cgroup-id to a map index
71                 cgrp_id = BPF_CORE_READ(cgrp, ancestor_ids[i]);
72                 elem = bpf_map_lookup_elem(&cgrp_idx, &cgrp_id);
73                 if (!elem)
74                         continue;
75
76                 cgrps[cnt++] = *elem;
77                 if (cnt == size)
78                         break;
79         }
80
81         return cnt;
82 }
83
84 static inline int get_cgroup_v2_idx(__u32 *cgrps, int size)
85 {
86         register int i = 0;
87         __u32 *elem;
88         int cnt;
89
90         for (cnt = 0; i < MAX_LEVELS; i++) {
91                 __u64 cgrp_id = bpf_get_current_ancestor_cgroup_id(i);
92
93                 if (cgrp_id == 0)
94                         break;
95
96                 // convert cgroup-id to a map index
97                 elem = bpf_map_lookup_elem(&cgrp_idx, &cgrp_id);
98                 if (!elem)
99                         continue;
100
101                 cgrps[cnt++] = *elem;
102                 if (cnt == size)
103                         break;
104         }
105
106         return cnt;
107 }
108
109 static int bperf_cgroup_count(void)
110 {
111         register __u32 idx = 0;  // to have it in a register to pass BPF verifier
112         register int c = 0;
113         struct bpf_perf_event_value val, delta, *prev_val, *cgrp_val;
114         __u32 cpu = bpf_get_smp_processor_id();
115         __u32 cgrp_idx[MAX_LEVELS];
116         int cgrp_cnt;
117         __u32 key, cgrp;
118         long err;
119
120         if (use_cgroup_v2)
121                 cgrp_cnt = get_cgroup_v2_idx(cgrp_idx, MAX_LEVELS);
122         else
123                 cgrp_cnt = get_cgroup_v1_idx(cgrp_idx, MAX_LEVELS);
124
125         for ( ; idx < MAX_EVENTS; idx++) {
126                 if (idx == num_events)
127                         break;
128
129                 // XXX: do not pass idx directly (for verifier)
130                 key = idx;
131                 // this is per-cpu array for diff
132                 prev_val = bpf_map_lookup_elem(&prev_readings, &key);
133                 if (!prev_val) {
134                         val.counter = val.enabled = val.running = 0;
135                         bpf_map_update_elem(&prev_readings, &key, &val, BPF_ANY);
136
137                         prev_val = bpf_map_lookup_elem(&prev_readings, &key);
138                         if (!prev_val)
139                                 continue;
140                 }
141
142                 // read from global perf_event array
143                 key = idx * num_cpus + cpu;
144                 err = bpf_perf_event_read_value(&events, key, &val, sizeof(val));
145                 if (err)
146                         continue;
147
148                 if (enabled) {
149                         delta.counter = val.counter - prev_val->counter;
150                         delta.enabled = val.enabled - prev_val->enabled;
151                         delta.running = val.running - prev_val->running;
152
153                         for (c = 0; c < MAX_LEVELS; c++) {
154                                 if (c == cgrp_cnt)
155                                         break;
156
157                                 cgrp = cgrp_idx[c];
158
159                                 // aggregate the result by cgroup
160                                 key = cgrp * num_events + idx;
161                                 cgrp_val = bpf_map_lookup_elem(&cgrp_readings, &key);
162                                 if (cgrp_val) {
163                                         cgrp_val->counter += delta.counter;
164                                         cgrp_val->enabled += delta.enabled;
165                                         cgrp_val->running += delta.running;
166                                 } else {
167                                         bpf_map_update_elem(&cgrp_readings, &key,
168                                                             &delta, BPF_ANY);
169                                 }
170                         }
171                 }
172
173                 *prev_val = val;
174         }
175         return 0;
176 }
177
178 // This will be attached to cgroup-switches event for each cpu
179 SEC("perf_event")
180 int BPF_PROG(on_cgrp_switch)
181 {
182         return bperf_cgroup_count();
183 }
184
185 SEC("raw_tp/sched_switch")
186 int BPF_PROG(trigger_read)
187 {
188         return bperf_cgroup_count();
189 }
190
191 char LICENSE[] SEC("license") = "Dual BSD/GPL";