2 * collectd - src/intel_pmu.c
4 * Copyright(c) 2017-2018 Intel Corporation. All rights reserved.
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
25 * Serhiy Pshyk <serhiyx.pshyk@intel.com>
26 * Kamil Wiatrowski <kamilx.wiatrowski@intel.com>
32 #include "utils_config_cores.h"
37 #define PMU_PLUGIN "intel_pmu"
39 #define HW_CACHE_READ_ACCESS \
40 (((PERF_COUNT_HW_CACHE_OP_READ) << 8) | \
41 ((PERF_COUNT_HW_CACHE_RESULT_ACCESS) << 16))
43 #define HW_CACHE_WRITE_ACCESS \
44 (((PERF_COUNT_HW_CACHE_OP_WRITE) << 8) | \
45 ((PERF_COUNT_HW_CACHE_RESULT_ACCESS) << 16))
47 #define HW_CACHE_PREFETCH_ACCESS \
48 (((PERF_COUNT_HW_CACHE_OP_PREFETCH) << 8) | \
49 ((PERF_COUNT_HW_CACHE_RESULT_ACCESS) << 16))
51 #define HW_CACHE_READ_MISS \
52 (((PERF_COUNT_HW_CACHE_OP_READ) << 8) | \
53 ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))
55 #define HW_CACHE_WRITE_MISS \
56 (((PERF_COUNT_HW_CACHE_OP_WRITE) << 8) | \
57 ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))
59 #define HW_CACHE_PREFETCH_MISS \
60 (((PERF_COUNT_HW_CACHE_OP_PREFETCH) << 8) | \
61 ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))
67 typedef struct event_info event_info_t;
69 struct intel_pmu_ctx_s {
70 _Bool hw_cache_events;
71 _Bool kernel_pmu_events;
73 char event_list_fn[PATH_MAX];
75 size_t hw_events_count;
76 core_groups_list_t cores;
77 struct eventlist *event_list;
79 typedef struct intel_pmu_ctx_s intel_pmu_ctx_t;
81 event_info_t g_kernel_pmu_events[] = {
82 {.name = "cpu-cycles", .config = PERF_COUNT_HW_CPU_CYCLES},
83 {.name = "instructions", .config = PERF_COUNT_HW_INSTRUCTIONS},
84 {.name = "cache-references", .config = PERF_COUNT_HW_CACHE_REFERENCES},
85 {.name = "cache-misses", .config = PERF_COUNT_HW_CACHE_MISSES},
86 {.name = "branches", .config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS},
87 {.name = "branch-misses", .config = PERF_COUNT_HW_BRANCH_MISSES},
88 {.name = "bus-cycles", .config = PERF_COUNT_HW_BUS_CYCLES},
91 event_info_t g_hw_cache_events[] = {
93 {.name = "L1-dcache-loads",
94 .config = (PERF_COUNT_HW_CACHE_L1D | HW_CACHE_READ_ACCESS)},
95 {.name = "L1-dcache-load-misses",
96 .config = (PERF_COUNT_HW_CACHE_L1D | HW_CACHE_READ_MISS)},
97 {.name = "L1-dcache-stores",
98 .config = (PERF_COUNT_HW_CACHE_L1D | HW_CACHE_WRITE_ACCESS)},
99 {.name = "L1-dcache-store-misses",
100 .config = (PERF_COUNT_HW_CACHE_L1D | HW_CACHE_WRITE_MISS)},
101 {.name = "L1-dcache-prefetches",
102 .config = (PERF_COUNT_HW_CACHE_L1D | HW_CACHE_PREFETCH_ACCESS)},
103 {.name = "L1-dcache-prefetch-misses",
104 .config = (PERF_COUNT_HW_CACHE_L1D | HW_CACHE_PREFETCH_MISS)},
106 {.name = "L1-icache-loads",
107 .config = (PERF_COUNT_HW_CACHE_L1I | HW_CACHE_READ_ACCESS)},
108 {.name = "L1-icache-load-misses",
109 .config = (PERF_COUNT_HW_CACHE_L1I | HW_CACHE_READ_MISS)},
110 {.name = "L1-icache-prefetches",
111 .config = (PERF_COUNT_HW_CACHE_L1I | HW_CACHE_PREFETCH_ACCESS)},
112 {.name = "L1-icache-prefetch-misses",
113 .config = (PERF_COUNT_HW_CACHE_L1I | HW_CACHE_PREFETCH_MISS)},
115 {.name = "LLC-loads",
116 .config = (PERF_COUNT_HW_CACHE_LL | HW_CACHE_READ_ACCESS)},
117 {.name = "LLC-load-misses",
118 .config = (PERF_COUNT_HW_CACHE_LL | HW_CACHE_READ_MISS)},
119 {.name = "LLC-stores",
120 .config = (PERF_COUNT_HW_CACHE_LL | HW_CACHE_WRITE_ACCESS)},
121 {.name = "LLC-store-misses",
122 .config = (PERF_COUNT_HW_CACHE_LL | HW_CACHE_WRITE_MISS)},
123 {.name = "LLC-prefetches",
124 .config = (PERF_COUNT_HW_CACHE_LL | HW_CACHE_PREFETCH_ACCESS)},
125 {.name = "LLC-prefetch-misses",
126 .config = (PERF_COUNT_HW_CACHE_LL | HW_CACHE_PREFETCH_MISS)},
128 {.name = "dTLB-loads",
129 .config = (PERF_COUNT_HW_CACHE_DTLB | HW_CACHE_READ_ACCESS)},
130 {.name = "dTLB-load-misses",
131 .config = (PERF_COUNT_HW_CACHE_DTLB | HW_CACHE_READ_MISS)},
132 {.name = "dTLB-stores",
133 .config = (PERF_COUNT_HW_CACHE_DTLB | HW_CACHE_WRITE_ACCESS)},
134 {.name = "dTLB-store-misses",
135 .config = (PERF_COUNT_HW_CACHE_DTLB | HW_CACHE_WRITE_MISS)},
136 {.name = "dTLB-prefetches",
137 .config = (PERF_COUNT_HW_CACHE_DTLB | HW_CACHE_PREFETCH_ACCESS)},
138 {.name = "dTLB-prefetch-misses",
139 .config = (PERF_COUNT_HW_CACHE_DTLB | HW_CACHE_PREFETCH_MISS)},
141 {.name = "iTLB-loads",
142 .config = (PERF_COUNT_HW_CACHE_ITLB | HW_CACHE_READ_ACCESS)},
143 {.name = "iTLB-load-misses",
144 .config = (PERF_COUNT_HW_CACHE_ITLB | HW_CACHE_READ_MISS)},
146 {.name = "branch-loads",
147 .config = (PERF_COUNT_HW_CACHE_BPU | HW_CACHE_READ_ACCESS)},
148 {.name = "branch-load-misses",
149 .config = (PERF_COUNT_HW_CACHE_BPU | HW_CACHE_READ_MISS)},
152 event_info_t g_sw_events[] = {
153 {.name = "cpu-clock", .config = PERF_COUNT_SW_CPU_CLOCK},
155 {.name = "task-clock", .config = PERF_COUNT_SW_TASK_CLOCK},
157 {.name = "context-switches", .config = PERF_COUNT_SW_CONTEXT_SWITCHES},
159 {.name = "cpu-migrations", .config = PERF_COUNT_SW_CPU_MIGRATIONS},
161 {.name = "page-faults", .config = PERF_COUNT_SW_PAGE_FAULTS},
163 {.name = "minor-faults", .config = PERF_COUNT_SW_PAGE_FAULTS_MIN},
165 {.name = "major-faults", .config = PERF_COUNT_SW_PAGE_FAULTS_MAJ},
167 {.name = "alignment-faults", .config = PERF_COUNT_SW_ALIGNMENT_FAULTS},
169 {.name = "emulation-faults", .config = PERF_COUNT_SW_EMULATION_FAULTS},
172 static intel_pmu_ctx_t g_ctx;
175 static void pmu_dump_events() {
177 DEBUG(PMU_PLUGIN ": Events:");
181 for (e = g_ctx.event_list->eventlist; e; e = e->next) {
182 DEBUG(PMU_PLUGIN ": event : %s", e->event);
183 DEBUG(PMU_PLUGIN ": group_lead: %d", e->group_leader);
184 DEBUG(PMU_PLUGIN ": end_group : %d", e->end_group);
185 DEBUG(PMU_PLUGIN ": type : %#x", e->attr.type);
186 DEBUG(PMU_PLUGIN ": config : %#x", (unsigned)e->attr.config);
187 DEBUG(PMU_PLUGIN ": size : %d", e->attr.size);
191 static void pmu_dump_config(void) {
193 DEBUG(PMU_PLUGIN ": Config:");
194 DEBUG(PMU_PLUGIN ": hw_cache_events : %d", g_ctx.hw_cache_events);
195 DEBUG(PMU_PLUGIN ": kernel_pmu_events : %d", g_ctx.kernel_pmu_events);
196 DEBUG(PMU_PLUGIN ": software_events : %d", g_ctx.sw_events);
198 for (size_t i = 0; i < g_ctx.hw_events_count; i++) {
199 DEBUG(PMU_PLUGIN ": hardware_events[%zu]: %s", i, g_ctx.hw_events[i]);
203 static void pmu_dump_cgroups(void) {
205 DEBUG(PMU_PLUGIN ": Core groups:");
207 for (size_t i = 0; i < g_ctx.cores.num_cgroups; i++) {
208 core_group_t *cgroup = g_ctx.cores.cgroups + i;
209 const size_t cores_size = cgroup->num_cores * 4 + 1;
210 char *cores = calloc(cores_size, sizeof(*cores));
212 DEBUG(PMU_PLUGIN ": Failed to allocate string to list cores.");
215 for (size_t j = 0; j < cgroup->num_cores; j++)
216 if (snprintf(cores + strlen(cores), cores_size - strlen(cores), " %d",
217 cgroup->cores[j]) < 0) {
218 DEBUG(PMU_PLUGIN ": Failed to write list of cores to string.");
223 DEBUG(PMU_PLUGIN ": group[%" PRIsz "]", i);
224 DEBUG(PMU_PLUGIN ": description: %s", cgroup->desc);
225 DEBUG(PMU_PLUGIN ": cores count: %" PRIsz, cgroup->num_cores);
226 DEBUG(PMU_PLUGIN ": cores :%s", cores);
231 #endif /* COLLECT_DEBUG */
233 static int pmu_validate_cgroups(core_group_t *cgroups, size_t len,
235 /* i - group index, j - core index */
236 for (size_t i = 0; i < len; i++) {
237 for (size_t j = 0; j < cgroups[i].num_cores; j++) {
238 int core = (int)cgroups[i].cores[j];
240 /* Core index cannot exceed number of cores in system,
241 note that max_cores include both online and offline CPUs. */
242 if (core >= max_cores) {
243 ERROR(PMU_PLUGIN ": Core %d is not valid, max core index: %d.", core,
248 /* Check if cores are set in remaining groups */
249 for (size_t k = i + 1; k < len; k++)
250 if (config_cores_cmp_cgroups(&cgroups[i], &cgroups[k]) != 0) {
251 ERROR(PMU_PLUGIN ": Same cores cannot be set in different groups.");
258 static int pmu_config_hw_events(oconfig_item_t *ci) {
260 if (strcasecmp("HardwareEvents", ci->key) != 0) {
264 if (g_ctx.hw_events) {
265 ERROR(PMU_PLUGIN ": Duplicate config for HardwareEvents.");
269 g_ctx.hw_events = calloc(ci->values_num, sizeof(char *));
270 if (g_ctx.hw_events == NULL) {
271 ERROR(PMU_PLUGIN ": Failed to allocate hw events.");
275 for (int i = 0; i < ci->values_num; i++) {
276 if (ci->values[i].type != OCONFIG_TYPE_STRING) {
277 WARNING(PMU_PLUGIN ": The %s option requires string arguments.", ci->key);
281 g_ctx.hw_events[g_ctx.hw_events_count] = strdup(ci->values[i].value.string);
282 if (g_ctx.hw_events[g_ctx.hw_events_count] == NULL) {
283 ERROR(PMU_PLUGIN ": Failed to allocate hw events entry.");
287 g_ctx.hw_events_count++;
293 static int pmu_config(oconfig_item_t *ci) {
295 DEBUG(PMU_PLUGIN ": %s:%d", __FUNCTION__, __LINE__);
297 for (int i = 0; i < ci->children_num; i++) {
299 oconfig_item_t *child = ci->children + i;
301 if (strcasecmp("ReportHardwareCacheEvents", child->key) == 0) {
302 ret = cf_util_get_boolean(child, &g_ctx.hw_cache_events);
303 } else if (strcasecmp("ReportKernelPMUEvents", child->key) == 0) {
304 ret = cf_util_get_boolean(child, &g_ctx.kernel_pmu_events);
305 } else if (strcasecmp("EventList", child->key) == 0) {
306 ret = cf_util_get_string_buffer(child, g_ctx.event_list_fn,
307 sizeof(g_ctx.event_list_fn));
308 } else if (strcasecmp("HardwareEvents", child->key) == 0) {
309 ret = pmu_config_hw_events(child);
310 } else if (strcasecmp("ReportSoftwareEvents", child->key) == 0) {
311 ret = cf_util_get_boolean(child, &g_ctx.sw_events);
312 } else if (strcasecmp("Cores", child->key) == 0) {
313 ret = config_cores_parse(child, &g_ctx.cores);
315 ERROR(PMU_PLUGIN ": Unknown configuration parameter \"%s\".", child->key);
320 DEBUG(PMU_PLUGIN ": %s:%d ret=%d", __FUNCTION__, __LINE__, ret);
332 static void pmu_submit_counter(const char *cgroup, const char *event,
333 counter_t value, meta_data_t *meta) {
334 value_list_t vl = VALUE_LIST_INIT;
336 vl.values = &(value_t){.counter = value};
339 sstrncpy(vl.plugin, PMU_PLUGIN, sizeof(vl.plugin));
340 sstrncpy(vl.plugin_instance, cgroup, sizeof(vl.plugin_instance));
343 sstrncpy(vl.type, "counter", sizeof(vl.type));
344 sstrncpy(vl.type_instance, event, sizeof(vl.type_instance));
346 plugin_dispatch_values(&vl);
349 meta_data_t *pmu_meta_data_create(const struct efd *efd) {
350 meta_data_t *meta = NULL;
352 /* create meta data only if value was scaled */
353 if (efd->val[1] == efd->val[2] || !efd->val[2]) {
357 meta = meta_data_create();
359 ERROR(PMU_PLUGIN ": meta_data_create failed.");
363 meta_data_add_unsigned_int(meta, "intel_pmu:raw_count", efd->val[0]);
364 meta_data_add_unsigned_int(meta, "intel_pmu:time_enabled", efd->val[1]);
365 meta_data_add_unsigned_int(meta, "intel_pmu:time_running", efd->val[2]);
370 static void pmu_dispatch_data(void) {
374 for (e = g_ctx.event_list->eventlist; e; e = e->next) {
375 for (size_t i = 0; i < g_ctx.cores.num_cgroups; i++) {
376 core_group_t *cgroup = g_ctx.cores.cgroups + i;
377 uint64_t cgroup_value = 0;
378 int event_enabled_cgroup = 0;
379 meta_data_t *meta = NULL;
381 for (size_t j = 0; j < cgroup->num_cores; j++) {
382 int core = (int)cgroup->cores[j];
383 if (e->efd[core].fd < 0)
386 event_enabled_cgroup++;
388 /* If there are more events than counters, the kernel uses time
389 * multiplexing. With multiplexing, at the end of the run,
390 * the counter is scaled basing on total time enabled vs time running.
391 * final_count = raw_count * time_enabled/time_running
393 uint64_t value = event_scaled_value(e, core);
394 cgroup_value += value;
396 /* get meta data with information about scaling */
397 if (cgroup->num_cores == 1)
398 meta = pmu_meta_data_create(&e->efd[core]);
401 if (event_enabled_cgroup > 0) {
402 DEBUG(PMU_PLUGIN ": %s/%s = %lu", e->event, cgroup->desc, cgroup_value);
403 /* dispatch per core group value */
404 pmu_submit_counter(cgroup->desc, e->event, cgroup_value, meta);
405 meta_data_destroy(meta);
411 static int pmu_read(__attribute__((unused)) user_data_t *ud) {
415 DEBUG(PMU_PLUGIN ": %s:%d", __FUNCTION__, __LINE__);
417 /* read all events only for configured cores */
418 for (e = g_ctx.event_list->eventlist; e; e = e->next) {
419 for (size_t i = 0; i < g_ctx.cores.num_cgroups; i++) {
420 core_group_t *cgroup = g_ctx.cores.cgroups + i;
421 for (size_t j = 0; j < cgroup->num_cores; j++) {
422 int core = (int)cgroup->cores[j];
423 if (e->efd[core].fd < 0)
426 ret = read_event(e, core);
428 ERROR(PMU_PLUGIN ": Failed to read value of %s/%d event.", e->event,
441 static int pmu_add_events(struct eventlist *el, uint32_t type,
442 event_info_t *events, size_t count) {
444 for (size_t i = 0; i < count; i++) {
445 /* Allocate memory for event struct that contains array of efd structs
448 calloc(sizeof(struct event) + sizeof(struct efd) * el->num_cpus, 1);
450 ERROR(PMU_PLUGIN ": Failed to allocate event structure");
455 e->attr.config = events[i].config;
456 e->attr.size = PERF_ATTR_SIZE_VER0;
459 if (el->eventlist_last)
460 el->eventlist_last->next = e;
461 el->eventlist_last = e;
462 e->event = strdup(events[i].name);
468 static int pmu_add_hw_events(struct eventlist *el, char **e, size_t count) {
470 for (size_t i = 0; i < count; i++) {
472 size_t group_events_count = 0;
474 char *events = strdup(e[i]);
478 char *s, *tmp = NULL;
479 for (s = strtok_r(events, ",", &tmp); s; s = strtok_r(NULL, ",", &tmp)) {
481 /* Allocate memory for event struct that contains array of efd structs
484 calloc(sizeof(struct event) + sizeof(struct efd) * el->num_cpus, 1);
490 if (resolve_event(s, &e->attr) != 0) {
491 WARNING(PMU_PLUGIN ": Cannot resolve %s", s);
496 /* Multiple events parsed in one entry */
497 if (group_events_count == 1) {
498 /* Mark previously added event as group leader */
499 el->eventlist_last->group_leader = 1;
505 if (el->eventlist_last)
506 el->eventlist_last->next = e;
507 el->eventlist_last = e;
508 e->event = strdup(s);
510 group_events_count++;
513 /* Multiple events parsed in one entry */
514 if (group_events_count > 1) {
515 /* Mark last added event as group end */
516 el->eventlist_last->end_group = 1;
525 static void pmu_free_events(struct eventlist *el) {
530 struct event *e = el->eventlist;
533 struct event *next = e->next;
539 el->eventlist = NULL;
542 static int pmu_setup_events(struct eventlist *el, bool measure_all,
544 struct event *e, *leader = NULL;
547 for (e = el->eventlist; e; e = e->next) {
549 for (size_t i = 0; i < g_ctx.cores.num_cgroups; i++) {
550 core_group_t *cgroup = g_ctx.cores.cgroups + i;
551 for (size_t j = 0; j < cgroup->num_cores; j++) {
552 int core = (int)cgroup->cores[j];
554 if (setup_event(e, core, leader, measure_all, measure_pid) < 0) {
555 WARNING(PMU_PLUGIN ": perf event '%s' is not available (cpu=%d).",
558 /* success if at least one event was set */
573 static int pmu_init(void) {
576 DEBUG(PMU_PLUGIN ": %s:%d", __FUNCTION__, __LINE__);
578 g_ctx.event_list = alloc_eventlist();
579 if (g_ctx.event_list == NULL) {
580 ERROR(PMU_PLUGIN ": Failed to allocate event list.");
584 if (g_ctx.cores.num_cgroups == 0) {
585 ret = config_cores_default(g_ctx.event_list->num_cpus, &g_ctx.cores);
587 ERROR(PMU_PLUGIN ": Failed to set default core groups.");
591 ret = pmu_validate_cgroups(g_ctx.cores.cgroups, g_ctx.cores.num_cgroups,
592 g_ctx.event_list->num_cpus);
594 ERROR(PMU_PLUGIN ": Invalid core groups configuration.");
602 if (g_ctx.hw_cache_events) {
604 pmu_add_events(g_ctx.event_list, PERF_TYPE_HW_CACHE, g_hw_cache_events,
605 STATIC_ARRAY_SIZE(g_hw_cache_events));
607 ERROR(PMU_PLUGIN ": Failed to add hw cache events.");
612 if (g_ctx.kernel_pmu_events) {
613 ret = pmu_add_events(g_ctx.event_list, PERF_TYPE_HARDWARE,
615 STATIC_ARRAY_SIZE(g_kernel_pmu_events));
617 ERROR(PMU_PLUGIN ": Failed to add kernel PMU events.");
622 /* parse events names if config option is present and is not empty */
623 if (g_ctx.hw_events_count) {
625 ret = read_events(g_ctx.event_list_fn);
627 ERROR(PMU_PLUGIN ": Failed to read event list file '%s'.",
628 g_ctx.event_list_fn);
632 ret = pmu_add_hw_events(g_ctx.event_list, g_ctx.hw_events,
633 g_ctx.hw_events_count);
635 ERROR(PMU_PLUGIN ": Failed to add hardware events.");
640 if (g_ctx.sw_events) {
641 ret = pmu_add_events(g_ctx.event_list, PERF_TYPE_SOFTWARE, g_sw_events,
642 STATIC_ARRAY_SIZE(g_sw_events));
644 ERROR(PMU_PLUGIN ": Failed to add software events.");
653 if (g_ctx.event_list->eventlist != NULL) {
654 /* measure all processes */
655 ret = pmu_setup_events(g_ctx.event_list, true, -1);
657 ERROR(PMU_PLUGIN ": Failed to setup perf events for the event list.");
662 ": Events list is empty. No events were setup for monitoring.");
669 pmu_free_events(g_ctx.event_list);
670 sfree(g_ctx.event_list);
671 for (size_t i = 0; i < g_ctx.hw_events_count; i++) {
672 sfree(g_ctx.hw_events[i]);
674 sfree(g_ctx.hw_events);
675 g_ctx.hw_events_count = 0;
677 config_cores_cleanup(&g_ctx.cores);
682 static int pmu_shutdown(void) {
684 DEBUG(PMU_PLUGIN ": %s:%d", __FUNCTION__, __LINE__);
686 pmu_free_events(g_ctx.event_list);
687 sfree(g_ctx.event_list);
688 for (size_t i = 0; i < g_ctx.hw_events_count; i++) {
689 sfree(g_ctx.hw_events[i]);
691 sfree(g_ctx.hw_events);
692 g_ctx.hw_events_count = 0;
694 config_cores_cleanup(&g_ctx.cores);
699 void module_register(void) {
700 plugin_register_init(PMU_PLUGIN, pmu_init);
701 plugin_register_complex_config(PMU_PLUGIN, pmu_config);
702 plugin_register_complex_read(NULL, PMU_PLUGIN, pmu_read, 0, NULL);
703 plugin_register_shutdown(PMU_PLUGIN, pmu_shutdown);