2 * collectd - src/intel_rdt.c
4 * Copyright(c) 2016-2018 Intel Corporation. All rights reserved.
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
25 * Serhiy Pshyk <serhiyx.pshyk@intel.com>
26 * Starzyk, Mateusz <mateuszx.starzyk@intel.com>
27 * Wojciech Andralojc <wojciechx.andralojc@intel.com>
31 #include "utils/common/common.h"
32 #include "utils/config_cores/config_cores.h"
33 #include "utils_proc_pids.h"
36 #define RDT_PLUGIN "intel_rdt"
38 /* libpqos v2.0 or newer is required for process monitoring*/
40 #if defined(PQOS_VERSION) && PQOS_VERSION >= 20000
44 #define RDT_PLUGIN "intel_rdt"
46 #define RDT_MAX_SOCKETS 8
47 #define RDT_MAX_SOCKET_CORES 64
48 #define RDT_MAX_CORES (RDT_MAX_SOCKET_CORES * RDT_MAX_SOCKETS)
52 * Process name inside comm file is limited to 16 chars.
53 * More info here: http://man7.org/linux/man-pages/man5/proc.5.html
55 #define RDT_MAX_NAMES_GROUPS 64
56 #define RDT_PROC_PATH "/proc"
65 struct rdt_name_group_s {
69 proc_pids_t *proc_pids_array;
70 size_t monitored_pids_count;
71 enum pqos_mon_event events;
73 typedef struct rdt_name_group_s rdt_name_group_t;
77 core_groups_list_t cores;
78 enum pqos_mon_event events[RDT_MAX_CORES];
79 struct pqos_mon_data *pcgroups[RDT_MAX_CORES];
81 rdt_name_group_t ngroups[RDT_MAX_NAMES_GROUPS];
82 struct pqos_mon_data *pngroups[RDT_MAX_NAMES_GROUPS];
85 const struct pqos_cpuinfo *pqos_cpu;
86 const struct pqos_cap *pqos_cap;
87 const struct pqos_capability *cap_mon;
89 typedef struct rdt_ctx_s rdt_ctx_t;
91 static rdt_ctx_t *g_rdt;
93 static rdt_config_status g_state = UNKNOWN;
95 static int g_interface = -1;
97 static void rdt_submit_derive(const char *cgroup, const char *type,
98 const char *type_instance, derive_t value) {
99 value_list_t vl = VALUE_LIST_INIT;
101 vl.values = &(value_t){.derive = value};
104 sstrncpy(vl.plugin, RDT_PLUGIN, sizeof(vl.plugin));
105 snprintf(vl.plugin_instance, sizeof(vl.plugin_instance), "%s", cgroup);
106 sstrncpy(vl.type, type, sizeof(vl.type));
108 sstrncpy(vl.type_instance, type_instance, sizeof(vl.type_instance));
110 plugin_dispatch_values(&vl);
113 static void rdt_submit_gauge(const char *cgroup, const char *type,
114 const char *type_instance, gauge_t value) {
115 value_list_t vl = VALUE_LIST_INIT;
117 vl.values = &(value_t){.gauge = value};
120 sstrncpy(vl.plugin, RDT_PLUGIN, sizeof(vl.plugin));
121 snprintf(vl.plugin_instance, sizeof(vl.plugin_instance), "%s", cgroup);
122 sstrncpy(vl.type, type, sizeof(vl.type));
124 sstrncpy(vl.type_instance, type_instance, sizeof(vl.type_instance));
126 plugin_dispatch_values(&vl);
130 static void rdt_dump_cgroups(void) {
131 char cores[RDT_MAX_CORES * 4];
136 DEBUG(RDT_PLUGIN ": Core Groups Dump");
137 DEBUG(RDT_PLUGIN ": groups count: %" PRIsz, g_rdt->cores.num_cgroups);
139 for (size_t i = 0; i < g_rdt->cores.num_cgroups; i++) {
140 core_group_t *cgroup = g_rdt->cores.cgroups + i;
142 memset(cores, 0, sizeof(cores));
143 for (size_t j = 0; j < cgroup->num_cores; j++) {
144 snprintf(cores + strlen(cores), sizeof(cores) - strlen(cores) - 1, " %d",
148 DEBUG(RDT_PLUGIN ": group[%zu]:", i);
149 DEBUG(RDT_PLUGIN ": description: %s", cgroup->desc);
150 DEBUG(RDT_PLUGIN ": cores: %s", cores);
151 DEBUG(RDT_PLUGIN ": events: 0x%X", g_rdt->events[i]);
158 static void rdt_dump_ngroups(void) {
160 char names[DATA_MAX_NAME_LEN];
165 DEBUG(RDT_PLUGIN ": Process Names Groups Dump");
166 DEBUG(RDT_PLUGIN ": groups count: %" PRIsz, g_rdt->num_ngroups);
168 for (size_t i = 0; i < g_rdt->num_ngroups; i++) {
169 memset(names, 0, sizeof(names));
170 for (size_t j = 0; j < g_rdt->ngroups[i].num_names; j++)
171 snprintf(names + strlen(names), sizeof(names) - strlen(names) - 1, " %s",
172 g_rdt->ngroups[i].names[j]);
174 DEBUG(RDT_PLUGIN ": group[%d]:", (int)i);
175 DEBUG(RDT_PLUGIN ": description: %s", g_rdt->ngroups[i].desc);
176 DEBUG(RDT_PLUGIN ": process names:%s", names);
177 DEBUG(RDT_PLUGIN ": events: 0x%X", g_rdt->ngroups[i].events);
182 #endif /* LIBPQOS2 */
184 static inline double bytes_to_kb(const double bytes) { return bytes / 1024.0; }
186 static inline double bytes_to_mb(const double bytes) {
187 return bytes / (1024.0 * 1024.0);
190 static void rdt_dump_cores_data(void) {
192 * CORE - monitored group of cores
193 * RMID - Resource Monitoring ID associated with the monitored group
194 * This is not available for monitoring with resource control
195 * LLC - last level cache occupancy
196 * MBL - local memory bandwidth
197 * MBR - remote memory bandwidth
200 if (g_interface == PQOS_INTER_OS_RESCTRL_MON) {
201 DEBUG(RDT_PLUGIN ": CORE LLC[KB] MBL[MB] MBR[MB]");
203 DEBUG(RDT_PLUGIN ": CORE RMID LLC[KB] MBL[MB] MBR[MB]");
206 DEBUG(RDT_PLUGIN ": CORE RMID LLC[KB] MBL[MB] MBR[MB]");
207 #endif /* LIBPQOS2 */
209 for (int i = 0; i < g_rdt->cores.num_cgroups; i++) {
210 const struct pqos_event_values *pv = &g_rdt->pcgroups[i]->values;
212 double llc = bytes_to_kb(pv->llc);
213 double mbr = bytes_to_mb(pv->mbm_remote_delta);
214 double mbl = bytes_to_mb(pv->mbm_local_delta);
216 if (g_interface == PQOS_INTER_OS_RESCTRL_MON) {
217 DEBUG(RDT_PLUGIN ": [%s] %10.1f %10.1f %10.1f",
218 g_rdt->cores.cgroups[i].desc, llc, mbl, mbr);
220 DEBUG(RDT_PLUGIN ": [%s] %8u %10.1f %10.1f %10.1f",
221 g_rdt->cores.cgroups[i].desc, g_rdt->pcgroups[i]->poll_ctx[0].rmid,
225 DEBUG(RDT_PLUGIN ": [%s] %8u %10.1f %10.1f %10.1f",
226 g_rdt->cores.cgroups[i].desc, g_rdt->pcgroups[i]->poll_ctx[0].rmid,
228 #endif /* LIBPQOS2 */
233 static void rdt_dump_pids_data(void) {
235 * NAME - monitored group of processes
236 * PIDs - list of PID numbers in the NAME group
237 * LLC - last level cache occupancy
238 * MBL - local memory bandwidth
239 * MBR - remote memory bandwidth
242 DEBUG(RDT_PLUGIN ": NAME PIDs");
243 char pids[DATA_MAX_NAME_LEN];
244 for (size_t i = 0; i < g_rdt->num_ngroups; ++i) {
245 memset(pids, 0, sizeof(pids));
246 for (size_t j = 0; j < g_rdt->ngroups[i].num_names; ++j) {
247 pids_list_t *list = g_rdt->ngroups[i].proc_pids_array[j].pids;
248 while (list != NULL) {
249 snprintf(pids + strlen(pids), sizeof(pids) - strlen(pids) - 1, " %u",
254 DEBUG(RDT_PLUGIN ": [%s] %s", g_rdt->ngroups[i].desc, pids);
257 DEBUG(RDT_PLUGIN ": NAME LLC[KB] MBL[MB] MBR[MB]");
258 for (size_t i = 0; i < g_rdt->num_ngroups; i++) {
260 const struct pqos_event_values *pv = &g_rdt->pngroups[i]->values;
262 double llc = bytes_to_kb(pv->llc);
263 double mbr = bytes_to_mb(pv->mbm_remote_delta);
264 double mbl = bytes_to_mb(pv->mbm_local_delta);
266 DEBUG(RDT_PLUGIN ": [%s] %10.1f %10.1f %10.1f", g_rdt->ngroups[i].desc,
270 #endif /* LIBPQOS2 */
271 #endif /* COLLECT_DEBUG */
274 static int isdupstr(const char *names[], const size_t size, const char *name) {
275 for (size_t i = 0; i < size; i++)
276 if (strncmp(names[i], name, (size_t)MAX_PROC_NAME_LEN) == 0)
287 * Converts string representing list of strings into array of strings.
289 * name,name1,name2,name3
292 * `str_list' String representing list of strings.
293 * `names' Array to put extracted strings into.
294 * `names_num' Variable to put number of extracted strings.
297 * Number of elements placed into names.
299 static int strlisttoarray(char *str_list, char ***names, size_t *names_num) {
300 char *saveptr = NULL;
302 if (str_list == NULL || names == NULL)
305 if (strstr(str_list, ",,")) {
306 /* strtok ignores empty words between separators.
307 * This condition handles that by rejecting strings
308 * with consecutive seprators */
309 ERROR(RDT_PLUGIN ": Empty process name");
314 char *token = strtok_r(str_list, ",", &saveptr);
320 while (isspace(*token))
326 if ((isdupstr((const char **)*names, *names_num, token))) {
327 ERROR(RDT_PLUGIN ": Duplicated process name \'%s\' in group \'%s\'",
331 if (0 != strarray_add(names, names_num, token)) {
332 ERROR(RDT_PLUGIN ": Error allocating process name string");
346 * Function to compare names in two name groups.
349 * `ng_a' Pointer to name group a.
350 * `ng_b' Pointer to name group b.
353 * 1 if both groups contain the same names
354 * 0 if none of their names match
355 * -1 if some but not all names match
357 static int ngroup_cmp(const rdt_name_group_t *ng_a,
358 const rdt_name_group_t *ng_b) {
361 assert(ng_a != NULL);
362 assert(ng_b != NULL);
364 const size_t sz_a = (unsigned)ng_a->num_names;
365 const size_t sz_b = (unsigned)ng_b->num_names;
366 const char **tab_a = (const char **)ng_a->names;
367 const char **tab_b = (const char **)ng_b->names;
369 for (size_t i = 0; i < sz_a; i++) {
370 for (size_t j = 0; j < sz_b; j++)
371 if (strncmp(tab_a[i], tab_b[j], (size_t)MAX_PROC_NAME_LEN) == 0)
374 /* if no names are the same */
377 /* if group contains same names */
378 if (sz_a == sz_b && sz_b == (size_t)found)
380 /* if not all names are the same */
389 * Function to set the descriptions and names for each process names group.
390 * Takes a config option containing list of strings that are used to set
391 * process group values.
394 * `item' Config option containing process names groups.
395 * `groups' Table of process name groups to set values in.
396 * `max_groups' Maximum number of process name groups allowed.
399 * On success, the number of name groups set up. On error, appropriate
400 * negative error value.
402 static int oconfig_to_ngroups(const oconfig_item_t *item,
403 rdt_name_group_t *groups,
404 const size_t max_groups) {
407 assert(groups != NULL);
408 assert(max_groups > 0);
409 assert(item != NULL);
411 for (int j = 0; j < item->values_num; j++) {
413 char value[DATA_MAX_NAME_LEN];
415 if ((item->values[j].value.string == NULL) ||
416 (strlen(item->values[j].value.string) == 0)) {
417 ERROR(RDT_PLUGIN ": Error - empty group");
421 sstrncpy(value, item->values[j].value.string, sizeof(value));
423 ret = strlisttoarray(value, &groups[index].names, &groups[index].num_names);
424 if (ret != 0 || groups[index].num_names == 0) {
425 ERROR(RDT_PLUGIN ": Error parsing process names group (%s)",
426 item->values[j].value.string);
430 /* set group description info */
431 groups[index].desc = sstrdup(item->values[j].value.string);
432 if (groups[index].desc == NULL) {
433 ERROR(RDT_PLUGIN ": Error allocating name group description");
437 groups[index].proc_pids_array = NULL;
438 groups[index].monitored_pids_count = 0;
442 if (index >= (const int)max_groups) {
443 WARNING(RDT_PLUGIN ": Too many process names groups configured");
451 static void rdt_free_ngroups(rdt_ctx_t *rdt) {
452 for (int i = 0; i < RDT_MAX_NAMES_GROUPS; i++) {
453 if (rdt->ngroups[i].desc)
454 DEBUG(RDT_PLUGIN ": Freeing pids \'%s\' group\'s data...",
455 rdt->ngroups[i].desc);
456 sfree(rdt->ngroups[i].desc);
457 strarray_free(rdt->ngroups[i].names, rdt->ngroups[i].num_names);
459 if (rdt->ngroups[i].proc_pids_array) {
460 for (size_t j = 0; j < rdt->ngroups[i].num_names; ++j) {
461 if (NULL == rdt->ngroups[i].proc_pids_array[j].pids)
463 pids_list_free(rdt->ngroups[i].proc_pids_array[j].pids);
466 sfree(rdt->ngroups[i].proc_pids_array);
468 rdt->ngroups[i].num_names = 0;
469 sfree(rdt->pngroups[i]);
473 static int rdt_config_ngroups(rdt_ctx_t *rdt, const oconfig_item_t *item) {
475 enum pqos_mon_event events = 0;
478 DEBUG(RDT_PLUGIN ": ngroups_config: Invalid argument.");
482 DEBUG(RDT_PLUGIN ": Process names groups [%d]:", item->values_num);
483 for (int j = 0; j < item->values_num; j++) {
484 if (item->values[j].type != OCONFIG_TYPE_STRING) {
486 ": given process names group value is not a string [idx=%d]",
490 DEBUG(RDT_PLUGIN ": [%d]: %s", j, item->values[j].value.string);
493 n = oconfig_to_ngroups(item, rdt->ngroups, RDT_MAX_NAMES_GROUPS);
495 rdt_free_ngroups(rdt);
496 ERROR(RDT_PLUGIN ": Error parsing process name groups configuration.");
500 /* validate configured process name values */
501 for (int group_idx = 0; group_idx < n; group_idx++) {
502 DEBUG(RDT_PLUGIN ": checking group [%d]: %s", group_idx,
503 rdt->ngroups[group_idx].desc);
504 for (size_t name_idx = 0; name_idx < rdt->ngroups[group_idx].num_names;
506 DEBUG(RDT_PLUGIN ": checking process name [%zu]: %s", name_idx,
507 rdt->ngroups[group_idx].names[name_idx]);
508 if (!is_proc_name_valid(rdt->ngroups[group_idx].names[name_idx])) {
509 ERROR(RDT_PLUGIN ": Process name group '%s' contains invalid name '%s'",
510 rdt->ngroups[group_idx].desc,
511 rdt->ngroups[group_idx].names[name_idx]);
512 rdt_free_ngroups(rdt);
519 ERROR(RDT_PLUGIN ": Empty process name groups configured.");
523 /* Get all available events on this platform */
524 for (unsigned i = 0; i < rdt->cap_mon->u.mon->num_events; i++)
525 events |= rdt->cap_mon->u.mon->events[i].type;
527 events &= ~(PQOS_PERF_EVENT_LLC_MISS);
529 DEBUG(RDT_PLUGIN ": Available events to monitor: %#x", events);
531 rdt->num_ngroups = n;
532 for (int i = 0; i < n; i++) {
533 for (int j = 0; j < i; j++) {
534 int found = ngroup_cmp(&rdt->ngroups[j], &rdt->ngroups[i]);
536 rdt_free_ngroups(rdt);
538 ": Cannot monitor same process name in different groups.");
543 rdt->ngroups[i].events = events;
544 rdt->pngroups[i] = calloc(1, sizeof(*rdt->pngroups[i]));
545 if (rdt->pngroups[i] == NULL) {
546 rdt_free_ngroups(rdt);
548 ": Failed to allocate memory for process name monitoring data.");
556 static int rdt_refresh_ngroup(rdt_name_group_t *ngroup,
557 struct pqos_mon_data *group_mon_data) {
564 if (NULL == ngroup->proc_pids_array) {
566 ": rdt_refresh_ngroup: \'%s\' uninitialized process pids array.",
572 DEBUG(RDT_PLUGIN ": rdt_refresh_ngroup: \'%s\' process names group.",
575 proc_pids_t *proc_pids_array_prev = ngroup->proc_pids_array;
576 proc_pids_t *proc_pids_array_curr = NULL;
579 fetch_pids_for_procs(RDT_PROC_PATH, (const char **)ngroup->names,
580 ngroup->num_names, &proc_pids_array_curr);
582 if (0 != fetch_result) {
583 ERROR(RDT_PLUGIN ": rdt_refresh_ngroup: \'%s\' failed to fetch PIDs.",
588 pids_list_t *new_pids = NULL;
589 pid_t *new_pids_array = NULL;
590 size_t new_pids_count = 0;
592 pids_list_t *lost_pids = NULL;
593 pid_t *lost_pids_array = NULL;
594 size_t lost_pids_count = 0;
596 for (size_t i = 0; i < ngroup->num_names; ++i) {
597 if (NULL == proc_pids_array_prev[i].pids &&
598 NULL == proc_pids_array_curr[i].pids)
600 int diff_result = pids_list_diff(
601 proc_pids_array_prev[i].pids, proc_pids_array_curr[i].pids, &new_pids,
602 &new_pids_count, &lost_pids, &lost_pids_count);
603 if (0 != diff_result) {
605 ": rdt_refresh_ngroup: \'%s\'. Error [%d] during PID diff.",
606 ngroup->desc, diff_result);
612 DEBUG(RDT_PLUGIN ": rdt_refresh_ngroup: \'%s\' process names group, added: "
614 ngroup->desc, (unsigned)new_pids_count, (unsigned)lost_pids_count);
616 if (new_pids && new_pids_count > 0) {
617 new_pids_array = malloc(new_pids_count * sizeof(pid_t));
618 if (new_pids_array == NULL) {
619 ERROR(RDT_PLUGIN ": rdt_refresh_ngroup: \'%s\'. Memory "
625 pids_list_to_array(new_pids_array, new_pids, new_pids_count);
627 /* no pids are monitored for this group yet: start monitoring */
628 if (0 == ngroup->monitored_pids_count) {
631 pqos_mon_start_pids(new_pids_count, new_pids_array, ngroup->events,
632 (void *)ngroup->desc, group_mon_data);
633 if (PQOS_RETVAL_OK == start_result) {
634 ngroup->monitored_pids_count = new_pids_count;
636 ERROR(RDT_PLUGIN ": rdt_refresh_ngroup: \'%s\'. Error [%d] while "
637 "STARTING pids monitoring",
638 ngroup->desc, start_result);
640 goto pqos_error_recovery;
646 pqos_mon_add_pids(new_pids_count, new_pids_array, group_mon_data);
647 if (PQOS_RETVAL_OK == add_result)
648 ngroup->monitored_pids_count += new_pids_count;
651 ": rdt_refresh_ngroup: \'%s\'. Error [%d] while ADDING pids.",
652 ngroup->desc, add_result);
654 goto pqos_error_recovery;
659 if (lost_pids && lost_pids_count > 0) {
660 lost_pids_array = malloc(lost_pids_count * sizeof(pid_t));
661 if (lost_pids_array == NULL) {
662 ERROR(RDT_PLUGIN ": rdt_refresh_ngroup: \'%s\'. Memory "
668 pids_list_to_array(lost_pids_array, lost_pids, lost_pids_count);
670 if (lost_pids_count == ngroup->monitored_pids_count) {
671 /* all pids for this group are lost: stop monitoring */
672 int stop_result = pqos_mon_stop(group_mon_data);
673 if (PQOS_RETVAL_OK != stop_result) {
674 ERROR(RDT_PLUGIN ": rdt_refresh_ngroup: \'%s\'. Error [%d] while "
675 "STOPPING monitoring",
676 ngroup->desc, stop_result);
678 goto pqos_error_recovery;
680 ngroup->monitored_pids_count = 0;
682 assert(lost_pids_count < ngroup->monitored_pids_count);
683 int remove_result = pqos_mon_remove_pids(lost_pids_count, lost_pids_array,
685 if (PQOS_RETVAL_OK == remove_result) {
686 ngroup->monitored_pids_count -= lost_pids_count;
689 ": rdt_refresh_ngroup: \'%s\'. Error [%d] while REMOVING pids.",
690 ngroup->desc, remove_result);
692 goto pqos_error_recovery;
697 if (new_pids_count > 0 || lost_pids_count > 0)
698 ngroup->proc_pids_array = proc_pids_array_curr;
704 * Resources might be temporary unavailable.
707 * Collectd will halt the reading thread for this
708 * plugin if it returns an error.
709 * Consecutive errors will be increasing the read period
710 * up to 1 day interval.
711 * On pqos error stop monitoring current group
712 * and reset the proc_pids array
713 * monitoring will be restarted on next collectd read cycle
715 DEBUG(RDT_PLUGIN ": rdt_refresh_ngroup: \'%s\' group RESET after error.",
717 pqos_mon_stop(group_mon_data);
718 for (size_t i = 0; i < ngroup->num_names; ++i) {
719 if (ngroup->proc_pids_array[i].pids)
720 pids_list_free(ngroup->proc_pids_array[i].pids);
722 sfree(ngroup->proc_pids_array);
724 initialize_proc_pids((const char **)ngroup->names, ngroup->num_names,
725 &ngroup->proc_pids_array);
726 ngroup->monitored_pids_count = 0;
729 if (ngroup->proc_pids_array == proc_pids_array_curr) {
730 assert(proc_pids_array_curr);
731 /* new list was successfully saved, free the old one */
732 for (size_t i = 0; i < ngroup->num_names; ++i)
733 if (proc_pids_array_prev[i].pids)
734 pids_list_free(proc_pids_array_prev[i].pids);
736 sfree(proc_pids_array_prev);
739 /* new list was not saved. Free the new list, keep the old one*/
740 for (size_t i = 0; i < ngroup->num_names; ++i)
741 if (proc_pids_array_curr[i].pids)
742 pids_list_free(proc_pids_array_curr[i].pids);
744 sfree(proc_pids_array_curr);
748 pids_list_free(new_pids);
751 free(new_pids_array);
754 pids_list_free(lost_pids);
757 free(lost_pids_array);
762 static int read_pids_data() {
764 if (0 == g_rdt->num_ngroups) {
765 DEBUG(RDT_PLUGIN ": read_pids_data: not configured - PIDs read skipped");
769 DEBUG(RDT_PLUGIN ": read_pids_data: Scanning active groups");
770 struct pqos_mon_data *active_groups[RDT_MAX_NAMES_GROUPS] = {0};
771 size_t active_group_idx = 0;
772 for (size_t pngroups_idx = 0;
773 pngroups_idx < STATIC_ARRAY_SIZE(g_rdt->pngroups); ++pngroups_idx)
774 if (0 != g_rdt->ngroups[pngroups_idx].monitored_pids_count)
775 active_groups[active_group_idx++] = g_rdt->pngroups[pngroups_idx];
779 if (0 == active_group_idx) {
780 DEBUG(RDT_PLUGIN ": read_pids_data: no active groups - PIDs read skipped");
784 DEBUG(RDT_PLUGIN ": read_pids_data: PIDs data polling");
786 int poll_result = pqos_mon_poll(active_groups, active_group_idx);
787 if (poll_result != PQOS_RETVAL_OK) {
788 ERROR(RDT_PLUGIN ": read_pids_data: Failed to poll monitoring data for "
795 for (size_t i = 0; i < g_rdt->num_ngroups; i++) {
796 enum pqos_mon_event mbm_events =
797 (PQOS_MON_EVENT_LMEM_BW | PQOS_MON_EVENT_TMEM_BW |
798 PQOS_MON_EVENT_RMEM_BW);
800 if (g_rdt->pngroups[i] == NULL ||
801 g_rdt->ngroups[i].monitored_pids_count == 0)
804 const struct pqos_event_values *pv = &g_rdt->pngroups[i]->values;
806 /* Submit only monitored events data */
808 if (g_rdt->ngroups[i].events & PQOS_MON_EVENT_L3_OCCUP)
809 rdt_submit_gauge(g_rdt->ngroups[i].desc, "bytes", "llc", pv->llc);
811 if (g_rdt->ngroups[i].events & PQOS_PERF_EVENT_IPC)
812 rdt_submit_gauge(g_rdt->ngroups[i].desc, "ipc", NULL, pv->ipc);
814 if (g_rdt->ngroups[i].events & mbm_events) {
815 rdt_submit_derive(g_rdt->ngroups[i].desc, "memory_bandwidth", "local",
816 pv->mbm_local_delta);
817 rdt_submit_derive(g_rdt->ngroups[i].desc, "memory_bandwidth", "remote",
818 pv->mbm_remote_delta);
823 rdt_dump_pids_data();
824 #endif /* COLLECT_DEBUG */
827 for (size_t i = 0; i < g_rdt->num_ngroups; i++) {
829 rdt_refresh_ngroup(&(g_rdt->ngroups[i]), g_rdt->pngroups[i]);
831 if (0 != refresh_result) {
832 ERROR(RDT_PLUGIN ": read_pids_data: NGroup %zu refresh failed. Error: %d",
835 /* refresh error will be escalated only if there were no
838 ret = refresh_result;
847 static void rdt_init_pids_monitoring() {
848 for (size_t group_idx = 0; group_idx < g_rdt->num_ngroups; group_idx++) {
850 * Each group must have not-null proc_pids array.
851 * Initial refresh is not mandatory for proper
852 * PIDs statistics detection.
854 rdt_name_group_t *ng = &g_rdt->ngroups[group_idx];
855 int init_result = initialize_proc_pids((const char **)ng->names,
856 ng->num_names, &ng->proc_pids_array);
857 if (0 != init_result) {
859 ": Initialization of proc_pids for group %zu failed. Error: %d",
860 group_idx, init_result);
864 int refresh_result = rdt_refresh_ngroup(&(g_rdt->ngroups[group_idx]),
865 g_rdt->pngroups[group_idx]);
866 if (0 != refresh_result)
867 ERROR(RDT_PLUGIN ": Initial refresh of group %zu failed. Error: %d",
868 group_idx, refresh_result);
871 #endif /* LIBPQOS2 */
873 static void rdt_free_cgroups(void) {
874 config_cores_cleanup(&g_rdt->cores);
875 for (int i = 0; i < RDT_MAX_CORES; i++) {
876 sfree(g_rdt->pcgroups[i]);
880 static int rdt_default_cgroups(void) {
881 unsigned num_cores = g_rdt->pqos_cpu->num_cores;
883 g_rdt->cores.cgroups = calloc(num_cores, sizeof(*(g_rdt->cores.cgroups)));
884 if (g_rdt->cores.cgroups == NULL) {
885 ERROR(RDT_PLUGIN ": Error allocating core groups array");
888 g_rdt->cores.num_cgroups = num_cores;
890 /* configure each core in separate group */
891 for (unsigned i = 0; i < num_cores; i++) {
892 core_group_t *cgroup = g_rdt->cores.cgroups + i;
893 char desc[DATA_MAX_NAME_LEN];
895 /* set core group info */
896 cgroup->cores = calloc(1, sizeof(*cgroup->cores));
897 if (cgroup->cores == NULL) {
898 ERROR(RDT_PLUGIN ": Error allocating cores array");
902 cgroup->num_cores = 1;
903 cgroup->cores[0] = i;
905 snprintf(desc, sizeof(desc), "%d", g_rdt->pqos_cpu->cores[i].lcore);
906 cgroup->desc = strdup(desc);
907 if (cgroup->desc == NULL) {
908 ERROR(RDT_PLUGIN ": Error allocating core group description");
917 static int rdt_is_core_id_valid(unsigned int core_id) {
919 for (unsigned int i = 0; i < g_rdt->pqos_cpu->num_cores; i++)
920 if (core_id == g_rdt->pqos_cpu->cores[i].lcore)
926 static int rdt_config_cgroups(oconfig_item_t *item) {
928 enum pqos_mon_event events = 0;
930 if (config_cores_parse(item, &g_rdt->cores) < 0) {
932 ERROR(RDT_PLUGIN ": Error parsing core groups configuration.");
935 n = g_rdt->cores.num_cgroups;
937 /* validate configured core id values */
938 for (size_t group_idx = 0; group_idx < n; group_idx++) {
939 core_group_t *cgroup = g_rdt->cores.cgroups + group_idx;
940 for (size_t core_idx = 0; core_idx < cgroup->num_cores; core_idx++) {
941 if (!rdt_is_core_id_valid(cgroup->cores[core_idx])) {
942 ERROR(RDT_PLUGIN ": Core group '%s' contains invalid core id '%u'",
943 cgroup->desc, cgroup->cores[core_idx]);
951 /* create default core groups if "Cores" config option is empty */
952 int ret = rdt_default_cgroups();
955 ERROR(RDT_PLUGIN ": Error creating default core groups configuration.");
960 ": No core groups configured. Default core groups created.");
963 /* Get all available events on this platform */
964 for (unsigned int i = 0; i < g_rdt->cap_mon->u.mon->num_events; i++)
965 events |= g_rdt->cap_mon->u.mon->events[i].type;
967 events &= ~(PQOS_PERF_EVENT_LLC_MISS);
969 DEBUG(RDT_PLUGIN ": Number of cores in the system: %u",
970 g_rdt->pqos_cpu->num_cores);
971 DEBUG(RDT_PLUGIN ": Available events to monitor: %#x", events);
973 g_rdt->cores.num_cgroups = n;
974 for (int i = 0; i < n; i++) {
975 for (int j = 0; j < i; j++) {
977 found = config_cores_cmp_cgroups(&g_rdt->cores.cgroups[j],
978 &g_rdt->cores.cgroups[i]);
981 ERROR(RDT_PLUGIN ": Cannot monitor same cores in different groups.");
986 g_rdt->events[i] = events;
987 g_rdt->pcgroups[i] = calloc(1, sizeof(*g_rdt->pcgroups[i]));
988 if (g_rdt->pcgroups[i] == NULL) {
990 ERROR(RDT_PLUGIN ": Failed to allocate memory for monitoring data.");
998 static void rdt_pqos_log(void *context, const size_t size, const char *msg) {
999 DEBUG(RDT_PLUGIN ": %s", msg);
1002 static int rdt_preinit(void) {
1005 if (g_rdt != NULL) {
1006 /* already initialized if config callback was called before init callback */
1010 g_rdt = calloc(1, sizeof(*g_rdt));
1011 if (g_rdt == NULL) {
1012 ERROR(RDT_PLUGIN ": Failed to allocate memory for rdt context.");
1016 struct pqos_config pqos = {.fd_log = -1,
1017 .callback_log = rdt_pqos_log,
1018 .context_log = NULL,
1021 .interface = PQOS_INTER_OS_RESCTRL_MON};
1022 DEBUG(RDT_PLUGIN ": Initializing PQoS with RESCTRL interface");
1024 .interface = PQOS_INTER_MSR};
1025 DEBUG(RDT_PLUGIN ": Initializing PQoS with MSR interface");
1028 ret = pqos_init(&pqos);
1029 DEBUG(RDT_PLUGIN ": PQoS initialization result: [%d]", ret);
1032 if (ret == PQOS_RETVAL_INTER) {
1033 pqos.interface = PQOS_INTER_MSR;
1034 DEBUG(RDT_PLUGIN ": Initializing PQoS with MSR interface");
1035 ret = pqos_init(&pqos);
1036 DEBUG(RDT_PLUGIN ": PQoS initialization result: [%d]", ret);
1040 if (ret != PQOS_RETVAL_OK) {
1041 ERROR(RDT_PLUGIN ": Error initializing PQoS library!");
1042 goto rdt_preinit_error1;
1045 g_interface = pqos.interface;
1047 ret = pqos_cap_get(&g_rdt->pqos_cap, &g_rdt->pqos_cpu);
1048 if (ret != PQOS_RETVAL_OK) {
1049 ERROR(RDT_PLUGIN ": Error retrieving PQoS capabilities.");
1050 goto rdt_preinit_error2;
1053 ret = pqos_cap_get_type(g_rdt->pqos_cap, PQOS_CAP_TYPE_MON, &g_rdt->cap_mon);
1054 if (ret == PQOS_RETVAL_PARAM) {
1055 ERROR(RDT_PLUGIN ": Error retrieving monitoring capabilities.");
1056 goto rdt_preinit_error2;
1059 if (g_rdt->cap_mon == NULL) {
1062 ": Monitoring capability not detected. Nothing to do for the plugin.");
1063 goto rdt_preinit_error2;
1066 /* Reset pqos monitoring groups registers */
1080 static int rdt_config(oconfig_item_t *ci) {
1081 if (rdt_preinit() != 0) {
1082 g_state = CONFIGURATION_ERROR;
1083 /* if we return -1 at this point collectd
1084 reports a failure in configuration and
1090 for (int i = 0; i < ci->children_num; i++) {
1091 oconfig_item_t *child = ci->children + i;
1093 if (strncasecmp("Cores", child->key, (size_t)strlen("Cores")) == 0) {
1094 if (rdt_config_cgroups(child) != 0) {
1095 g_state = CONFIGURATION_ERROR;
1096 /* if we return -1 at this point collectd
1097 reports a failure in configuration and
1105 #endif /* COLLECT_DEBUG */
1106 } else if (strncasecmp("Processes", child->key,
1107 (size_t)strlen("Processes")) == 0) {
1109 if (g_interface != PQOS_INTER_OS_RESCTRL_MON) {
1110 ERROR(RDT_PLUGIN ": Configuration parameter \"%s\" not supported. "
1111 "Resctrl monitoring is needed for PIDs monitoring.",
1113 g_state = CONFIGURATION_ERROR;
1114 /* if we return -1 at this point collectd
1115 reports a failure in configuration and
1121 if (rdt_config_ngroups(g_rdt, child) != 0) {
1122 g_state = CONFIGURATION_ERROR;
1123 /* if we return -1 at this point collectd
1124 reports a failure in configuration and
1132 #endif /* COLLECT_DEBUG */
1133 #else /* !LIBPQOS2 */
1134 ERROR(RDT_PLUGIN ": Configuration parameter \"%s\" not supported, please "
1135 "recompile collectd with libpqos version 2.0 or newer.",
1137 #endif /* LIBPQOS2 */
1139 ERROR(RDT_PLUGIN ": Unknown configuration parameter \"%s\".", child->key);
1146 static int read_cores_data() {
1148 if (0 == g_rdt->cores.num_cgroups) {
1149 DEBUG(RDT_PLUGIN ": read_cores_data: not configured - Cores read skipped");
1152 DEBUG(RDT_PLUGIN ": read_cores_data: Cores data poll");
1155 pqos_mon_poll(&g_rdt->pcgroups[0], (unsigned)g_rdt->cores.num_cgroups);
1156 if (ret != PQOS_RETVAL_OK) {
1157 ERROR(RDT_PLUGIN ": read_cores_data: Failed to poll monitoring data for "
1158 "cores. Error [%d].",
1163 for (size_t i = 0; i < g_rdt->cores.num_cgroups; i++) {
1164 core_group_t *cgroup = g_rdt->cores.cgroups + i;
1165 enum pqos_mon_event mbm_events =
1166 (PQOS_MON_EVENT_LMEM_BW | PQOS_MON_EVENT_TMEM_BW |
1167 PQOS_MON_EVENT_RMEM_BW);
1169 const struct pqos_event_values *pv = &g_rdt->pcgroups[i]->values;
1171 /* Submit only monitored events data */
1173 if (g_rdt->events[i] & PQOS_MON_EVENT_L3_OCCUP)
1174 rdt_submit_gauge(cgroup->desc, "bytes", "llc", pv->llc);
1176 if (g_rdt->events[i] & PQOS_PERF_EVENT_IPC)
1177 rdt_submit_gauge(cgroup->desc, "ipc", NULL, pv->ipc);
1179 if (g_rdt->events[i] & mbm_events) {
1180 rdt_submit_derive(cgroup->desc, "memory_bandwidth", "local",
1181 pv->mbm_local_delta);
1182 rdt_submit_derive(cgroup->desc, "memory_bandwidth", "remote",
1183 pv->mbm_remote_delta);
1188 rdt_dump_cores_data();
1189 #endif /* COLLECT_DEBUG */
1194 static int rdt_read(__attribute__((unused)) user_data_t *ud) {
1196 if (g_rdt == NULL) {
1197 ERROR(RDT_PLUGIN ": rdt_read: plugin not initialized.");
1201 int cores_read_result = read_cores_data();
1204 int pids_read_result = read_pids_data();
1205 #endif /* LIBPQOS2 */
1207 if (0 != cores_read_result)
1208 return cores_read_result;
1211 if (0 != pids_read_result)
1212 return pids_read_result;
1213 #endif /* LIBPQOS2 */
1218 static void rdt_init_cores_monitoring() {
1219 for (size_t i = 0; i < g_rdt->cores.num_cgroups; i++) {
1220 core_group_t *cg = g_rdt->cores.cgroups + i;
1222 int mon_start_result =
1223 pqos_mon_start(cg->num_cores, cg->cores, g_rdt->events[i],
1224 (void *)cg->desc, g_rdt->pcgroups[i]);
1226 if (mon_start_result != PQOS_RETVAL_OK)
1228 ": Error starting cores monitoring group %s (pqos status=%d)",
1229 cg->desc, mon_start_result);
1233 static int rdt_init(void) {
1235 if (g_state == CONFIGURATION_ERROR)
1238 int rdt_preinint_result = rdt_preinit();
1239 if (rdt_preinint_result != 0)
1240 return rdt_preinint_result;
1242 rdt_init_cores_monitoring();
1244 rdt_init_pids_monitoring();
1245 #endif /* LIBPQOS2 */
1250 static int rdt_shutdown(void) {
1253 DEBUG(RDT_PLUGIN ": rdt_shutdown.");
1258 /* Stop monitoring cores */
1259 for (size_t i = 0; i < g_rdt->cores.num_cgroups; i++) {
1260 pqos_mon_stop(g_rdt->pcgroups[i]);
1263 /* Stop pids monitoring */
1265 for (size_t i = 0; i < g_rdt->num_ngroups; i++)
1266 pqos_mon_stop(g_rdt->pngroups[i]);
1270 if (ret != PQOS_RETVAL_OK)
1271 ERROR(RDT_PLUGIN ": Error shutting down PQoS library.");
1274 rdt_free_ngroups(g_rdt);
1275 #endif /* LIBPQOS2 */
1281 void module_register(void) {
1282 plugin_register_init(RDT_PLUGIN, rdt_init);
1283 plugin_register_complex_config(RDT_PLUGIN, rdt_config);
1284 plugin_register_complex_read(NULL, RDT_PLUGIN, rdt_read, 0, NULL);
1285 plugin_register_shutdown(RDT_PLUGIN, rdt_shutdown);