2 * collectd - src/intel_rdt.c
4 * Copyright(c) 2016-2018 Intel Corporation. All rights reserved.
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
25 * Serhiy Pshyk <serhiyx.pshyk@intel.com>
26 * Starzyk, Mateusz <mateuszx.starzyk@intel.com>
27 * Wojciech Andralojc <wojciechx.andralojc@intel.com>
31 #include "utils/common/common.h"
32 #include "utils/config_cores/config_cores.h"
35 #define RDT_PLUGIN "intel_rdt"
38 * In future: Start monitoring for PID group. For perf grouping will be added.
39 * Currently: Start monitoring only for the first PID.
41 __attribute__((unused)) static int
42 pqos_mon_start_pids(const unsigned num_pids, const pid_t *pids,
43 const enum pqos_mon_event event, void *context,
44 struct pqos_mon_data *group) {
48 return pqos_mon_start_pid(pids[0], event, context, group);
52 * In future: Add PIDs to the monitoring group. Supported for resctrl monitoring
54 * Currently: Does nothing.
56 __attribute__((unused)) static int
57 pqos_mon_add_pids(const unsigned num_pids, const pid_t *pids, void *context,
58 struct pqos_mon_data *group) {
59 return PQOS_RETVAL_OK;
63 * In future: Remove PIDs from the monitoring group. Supported for resctrl
65 * Currently: Does nothing.
67 __attribute__((unused)) static int
68 pqos_mon_remove_pids(const unsigned num_pids, const pid_t *pids, void *context,
69 struct pqos_mon_data *group) {
70 return PQOS_RETVAL_OK;
73 #define RDT_PLUGIN "intel_rdt"
75 #define RDT_MAX_SOCKETS 8
76 #define RDT_MAX_SOCKET_CORES 64
77 #define RDT_MAX_CORES (RDT_MAX_SOCKET_CORES * RDT_MAX_SOCKETS)
80 * Process name inside comm file is limited to 16 chars.
81 * More info here: http://man7.org/linux/man-pages/man5/proc.5.html
83 #define RDT_MAX_NAME_LEN 16
84 #define RDT_MAX_NAMES_GROUPS 64
86 #define RDT_PROC_PATH "/proc"
93 /* Helper typedef for process name array
94 * Extra 1 char is added for string null termination.
96 typedef char proc_comm_t[RDT_MAX_NAME_LEN + 1];
98 /* Linked one-way list of pids. */
99 typedef struct pids_list_s {
101 struct pids_list_s *next;
104 /* Holds process name and list of pids assigned to that name */
105 typedef struct proc_pids_s {
106 proc_comm_t proccess_name;
110 struct rdt_name_group_s {
114 proc_pids_t *proc_pids_array;
115 enum pqos_mon_event events;
117 typedef struct rdt_name_group_s rdt_name_group_t;
120 core_groups_list_t cores;
121 enum pqos_mon_event events[RDT_MAX_CORES];
122 struct pqos_mon_data *pcgroups[RDT_MAX_CORES];
123 rdt_name_group_t ngroups[RDT_MAX_NAMES_GROUPS];
124 struct pqos_mon_data *pngroups[RDT_MAX_NAMES_GROUPS];
126 const struct pqos_cpuinfo *pqos_cpu;
127 const struct pqos_cap *pqos_cap;
128 const struct pqos_capability *cap_mon;
130 typedef struct rdt_ctx_s rdt_ctx_t;
132 static rdt_ctx_t *g_rdt;
134 static rdt_config_status g_state = UNKNOWN;
136 static int isdupstr(const char *names[], const size_t size, const char *name) {
137 for (size_t i = 0; i < size; i++)
138 if (strncmp(names[i], name, (size_t)RDT_MAX_NAME_LEN) == 0)
149 * Converts string representing list of strings into array of strings.
151 * name,name1,name2,name3
154 * `str_list' String representing list of strings.
155 * `names' Array to put extracted strings into.
156 * `names_num' Variable to put number of extracted strings.
159 * Number of elements placed into names.
161 static int strlisttoarray(char *str_list, char ***names, size_t *names_num) {
162 char *saveptr = NULL;
164 if (str_list == NULL || names == NULL)
168 char *token = strtok_r(str_list, ",", &saveptr);
174 while (isspace(*token))
180 if (!(isdupstr((const char **)*names, *names_num, token)))
181 if (0 != strarray_add(names, names_num, token)) {
182 ERROR(RDT_PLUGIN ": Error allocating process name string");
195 * Function to compare names in two name groups.
198 * `ng_a' Pointer to name group a.
199 * `ng_b' Pointer to name group b.
202 * 1 if both groups contain the same names
203 * 0 if none of their names match
204 * -1 if some but not all names match
206 static int ngroup_cmp(const rdt_name_group_t *ng_a,
207 const rdt_name_group_t *ng_b) {
210 assert(ng_a != NULL);
211 assert(ng_b != NULL);
213 const size_t sz_a = (unsigned)ng_a->num_names;
214 const size_t sz_b = (unsigned)ng_b->num_names;
215 const char **tab_a = (const char **)ng_a->names;
216 const char **tab_b = (const char **)ng_b->names;
218 for (size_t i = 0; i < sz_a; i++) {
219 for (size_t j = 0; j < sz_b; j++)
220 if (strncmp(tab_a[i], tab_b[j], (size_t)RDT_MAX_NAME_LEN) == 0)
223 /* if no names are the same */
226 /* if group contains same names */
227 if (sz_a == sz_b && sz_b == (size_t)found)
229 /* if not all names are the same */
238 * Function to set the descriptions and names for each process names group.
239 * Takes a config option containing list of strings that are used to set
240 * process group values.
243 * `item' Config option containing process names groups.
244 * `groups' Table of process name groups to set values in.
245 * `max_groups' Maximum number of process name groups allowed.
248 * On success, the number of name groups set up. On error, appropriate
249 * negative error value.
251 static int oconfig_to_ngroups(const oconfig_item_t *item,
252 rdt_name_group_t *groups,
253 const size_t max_groups) {
256 assert(groups != NULL);
257 assert(max_groups > 0);
258 assert(item != NULL);
260 for (int j = 0; j < item->values_num; j++) {
262 char value[DATA_MAX_NAME_LEN];
264 if ((item->values[j].value.string == NULL) ||
265 (strlen(item->values[j].value.string) == 0))
268 sstrncpy(value, item->values[j].value.string, sizeof(value));
270 ret = strlisttoarray(value, &groups[index].names, &groups[index].num_names);
271 if (ret != 0 || groups[index].num_names == 0) {
272 ERROR(RDT_PLUGIN ": Error parsing process names group (%s)",
273 item->values[j].value.string);
277 /* set group description info */
278 groups[index].desc = sstrdup(item->values[j].value.string);
279 if (groups[index].desc == NULL) {
280 ERROR(RDT_PLUGIN ": Error allocating name group description");
284 groups[index].proc_pids_array = NULL;
288 if (index >= (const int)max_groups) {
289 WARNING(RDT_PLUGIN ": Too many process names groups configured");
298 static void rdt_dump_cgroups(void) {
299 char cores[RDT_MAX_CORES * 4];
304 DEBUG(RDT_PLUGIN ": Core Groups Dump");
305 DEBUG(RDT_PLUGIN ": groups count: %" PRIsz, g_rdt->cores.num_cgroups);
307 for (size_t i = 0; i < g_rdt->cores.num_cgroups; i++) {
308 core_group_t *cgroup = g_rdt->cores.cgroups + i;
310 memset(cores, 0, sizeof(cores));
311 for (size_t j = 0; j < cgroup->num_cores; j++) {
312 snprintf(cores + strlen(cores), sizeof(cores) - strlen(cores) - 1, " %d",
316 DEBUG(RDT_PLUGIN ": group[%zu]:", i);
317 DEBUG(RDT_PLUGIN ": description: %s", cgroup->desc);
318 DEBUG(RDT_PLUGIN ": cores: %s", cores);
319 DEBUG(RDT_PLUGIN ": events: 0x%X", g_rdt->events[i]);
325 static void rdt_dump_ngroups(void) {
327 char names[DATA_MAX_NAME_LEN];
332 DEBUG(RDT_PLUGIN ": Process Names Groups Dump");
333 DEBUG(RDT_PLUGIN ": groups count: %" PRIsz, g_rdt->num_ngroups);
335 for (size_t i = 0; i < g_rdt->num_ngroups; i++) {
336 memset(names, 0, sizeof(names));
337 for (size_t j = 0; j < g_rdt->ngroups[i].num_names; j++)
338 snprintf(names + strlen(names), sizeof(names) - strlen(names) - 1, " %s",
339 g_rdt->ngroups[i].names[j]);
341 DEBUG(RDT_PLUGIN ": group[%d]:", (int)i);
342 DEBUG(RDT_PLUGIN ": description: %s", g_rdt->ngroups[i].desc);
343 DEBUG(RDT_PLUGIN ": process names:%s", names);
344 DEBUG(RDT_PLUGIN ": events: 0x%X", g_rdt->ngroups[i].events);
350 static inline double bytes_to_kb(const double bytes) { return bytes / 1024.0; }
352 static inline double bytes_to_mb(const double bytes) {
353 return bytes / (1024.0 * 1024.0);
356 static void rdt_dump_data(void) {
358 * CORE - monitored group of cores
359 * NAME - monitored group of processes
360 * RMID - Resource Monitoring ID associated with the monitored group
361 * LLC - last level cache occupancy
362 * MBL - local memory bandwidth
363 * MBR - remote memory bandwidth
365 DEBUG(" CORE RMID LLC[KB] MBL[MB] MBR[MB]");
366 for (size_t i = 0; i < g_rdt->cores.num_cgroups; i++) {
368 const struct pqos_event_values *pv = &g_rdt->pcgroups[i]->values;
370 double llc = bytes_to_kb(pv->llc);
371 double mbr = bytes_to_mb(pv->mbm_remote_delta);
372 double mbl = bytes_to_mb(pv->mbm_local_delta);
374 DEBUG(" [%s] %8u %10.1f %10.1f %10.1f", g_rdt->cores.cgroups[i].desc,
375 g_rdt->pcgroups[i]->poll_ctx[0].rmid, llc, mbl, mbr);
379 char pids[DATA_MAX_NAME_LEN];
380 for (size_t i = 0; i < g_rdt->num_ngroups; ++i) {
381 memset(pids, 0, sizeof(pids));
382 for (size_t j = 0; j < g_rdt->ngroups[i].num_names; ++j) {
383 pids_list_t *list = g_rdt->ngroups[i].proc_pids_array[j].pids;
384 while (list != NULL) {
385 snprintf(pids + strlen(pids), sizeof(pids) - strlen(pids) - 1, " %u",
390 DEBUG(" [%s] %s", g_rdt->ngroups[i].desc, pids);
393 DEBUG(" NAME RMID LLC[KB] MBL[MB] MBR[MB]");
394 for (size_t i = 0; i < g_rdt->num_ngroups; i++) {
396 if (g_rdt->pngroups[i]->poll_ctx == NULL)
399 const struct pqos_event_values *pv = &g_rdt->pngroups[i]->values;
401 double llc = bytes_to_kb(pv->llc);
402 double mbr = bytes_to_mb(pv->mbm_remote_delta);
403 double mbl = bytes_to_mb(pv->mbm_local_delta);
405 DEBUG(" [%s] %8u %10.1f %10.1f %10.1f", g_rdt->ngroups[i].desc,
406 g_rdt->pngroups[i]->poll_ctx[0].rmid, llc, mbl, mbr);
409 #endif /* COLLECT_DEBUG */
411 static void rdt_free_cgroups(void) {
412 config_cores_cleanup(&g_rdt->cores);
413 for (int i = 0; i < RDT_MAX_CORES; i++) {
414 sfree(g_rdt->pcgroups[i]);
418 static int pids_list_free(pids_list_t *list) {
421 pids_list_t *current = list;
422 while (current != NULL) {
423 pids_list_t *previous = current;
424 current = current->next;
430 static void rdt_free_ngroups(void) {
431 for (int i = 0; i < RDT_MAX_NAMES_GROUPS; i++) {
432 DEBUG(RDT_PLUGIN ": Freeing \'%s\' group\'s data...",
433 g_rdt->ngroups[i].desc);
434 sfree(g_rdt->ngroups[i].desc);
435 strarray_free(g_rdt->ngroups[i].names, g_rdt->ngroups[i].num_names);
437 if (g_rdt->ngroups[i].proc_pids_array) {
438 for (size_t j = 0; j < g_rdt->ngroups[i].num_names; ++j) {
439 if (NULL == g_rdt->ngroups[i].proc_pids_array[j].pids)
441 pids_list_free(g_rdt->ngroups[i].proc_pids_array[j].pids);
444 sfree(g_rdt->ngroups[i].proc_pids_array);
447 g_rdt->ngroups[i].num_names = 0;
448 sfree(g_rdt->pngroups[i]);
452 static int rdt_default_cgroups(void) {
453 unsigned num_cores = g_rdt->pqos_cpu->num_cores;
455 g_rdt->cores.cgroups = calloc(num_cores, sizeof(*g_rdt->cores.cgroups));
456 if (g_rdt->cores.cgroups == NULL) {
457 ERROR(RDT_PLUGIN ": Error allocating core groups array");
460 g_rdt->cores.num_cgroups = num_cores;
462 /* configure each core in separate group */
463 for (unsigned i = 0; i < num_cores; i++) {
464 core_group_t *cgroup = g_rdt->cores.cgroups + i;
465 char desc[DATA_MAX_NAME_LEN];
467 /* set core group info */
468 cgroup->cores = calloc(1, sizeof(*cgroup->cores));
469 if (cgroup->cores == NULL) {
470 ERROR(RDT_PLUGIN ": Error allocating cores array");
474 cgroup->num_cores = 1;
475 cgroup->cores[0] = i;
477 snprintf(desc, sizeof(desc), "%d", g_rdt->pqos_cpu->cores[i].lcore);
478 cgroup->desc = strdup(desc);
479 if (cgroup->desc == NULL) {
480 ERROR(RDT_PLUGIN ": Error allocating core group description");
489 static int rdt_is_core_id_valid(unsigned int core_id) {
491 for (unsigned int i = 0; i < g_rdt->pqos_cpu->num_cores; i++)
492 if (core_id == g_rdt->pqos_cpu->cores[i].lcore)
498 static int rdt_is_proc_name_valid(const char *name) {
501 unsigned len = strlen(name);
502 if (len > 0 && len <= RDT_MAX_NAME_LEN)
506 ": Process name \'%s\' is too long. Max supported len is %d chars.",
507 name, RDT_MAX_NAME_LEN);
514 static int rdt_config_cgroups(oconfig_item_t *item) {
516 enum pqos_mon_event events = 0;
518 if (config_cores_parse(item, &g_rdt->cores) < 0) {
520 ERROR(RDT_PLUGIN ": Error parsing core groups configuration.");
523 n = g_rdt->cores.num_cgroups;
525 /* validate configured core id values */
526 for (size_t group_idx = 0; group_idx < n; group_idx++) {
527 core_group_t *cgroup = g_rdt->cores.cgroups + group_idx;
528 for (size_t core_idx = 0; core_idx < cgroup->num_cores; core_idx++) {
529 if (!rdt_is_core_id_valid(cgroup->cores[core_idx])) {
530 ERROR(RDT_PLUGIN ": Core group '%s' contains invalid core id '%u'",
531 cgroup->desc, cgroup->cores[core_idx]);
539 /* create default core groups if "Cores" config option is empty */
540 int ret = rdt_default_cgroups();
543 ERROR(RDT_PLUGIN ": Error creating default core groups configuration.");
548 ": No core groups configured. Default core groups created.");
551 /* Get all available events on this platform */
552 for (unsigned int i = 0; i < g_rdt->cap_mon->u.mon->num_events; i++)
553 events |= g_rdt->cap_mon->u.mon->events[i].type;
555 events &= ~(PQOS_PERF_EVENT_LLC_MISS);
557 DEBUG(RDT_PLUGIN ": Number of cores in the system: %u",
558 g_rdt->pqos_cpu->num_cores);
559 DEBUG(RDT_PLUGIN ": Available events to monitor: %#x", events);
561 g_rdt->cores.num_cgroups = n;
562 for (int i = 0; i < n; i++) {
563 for (int j = 0; j < i; j++) {
565 found = config_cores_cmp_cgroups(&g_rdt->cores.cgroups[j],
566 &g_rdt->cores.cgroups[i]);
569 ERROR(RDT_PLUGIN ": Cannot monitor same cores in different groups.");
574 g_rdt->events[i] = events;
575 g_rdt->pcgroups[i] = calloc(1, sizeof(*g_rdt->pcgroups[i]));
576 if (g_rdt->pcgroups[i] == NULL) {
578 ERROR(RDT_PLUGIN ": Failed to allocate memory for monitoring data.");
586 static int rdt_config_ngroups(const oconfig_item_t *item) {
588 enum pqos_mon_event events = 0;
591 DEBUG(RDT_PLUGIN ": ngroups_config: Invalid argument.");
595 DEBUG(RDT_PLUGIN ": Process names groups [%d]:", item->values_num);
596 for (int j = 0; j < item->values_num; j++) {
597 if (item->values[j].type != OCONFIG_TYPE_STRING) {
599 ": given process names group value is not a string [idx=%d]",
603 DEBUG(RDT_PLUGIN ": [%d]: %s", j, item->values[j].value.string);
606 n = oconfig_to_ngroups(item, g_rdt->ngroups, RDT_MAX_NAMES_GROUPS);
609 ERROR(RDT_PLUGIN ": Error parsing process name groups configuration.");
613 /* validate configured process name values */
614 for (int group_idx = 0; group_idx < n; group_idx++) {
615 for (size_t name_idx = 0; name_idx < g_rdt->ngroups[group_idx].num_names;
617 if (!rdt_is_proc_name_valid(g_rdt->ngroups[group_idx].names[name_idx])) {
618 ERROR(RDT_PLUGIN ": Process name group '%s' contains invalid name '%s'",
619 g_rdt->ngroups[group_idx].desc,
620 g_rdt->ngroups[group_idx].names[name_idx]);
628 ERROR(RDT_PLUGIN ": Empty process name groups configured.");
632 /* Get all available events on this platform */
633 for (unsigned i = 0; i < g_rdt->cap_mon->u.mon->num_events; i++)
634 events |= g_rdt->cap_mon->u.mon->events[i].type;
636 events &= ~(PQOS_PERF_EVENT_LLC_MISS);
638 DEBUG(RDT_PLUGIN ": Available events to monitor: %#x", events);
640 g_rdt->num_ngroups = n;
641 for (int i = 0; i < n; i++) {
642 for (int j = 0; j < i; j++) {
643 int found = ngroup_cmp(&g_rdt->ngroups[j], &g_rdt->ngroups[i]);
647 ": Cannot monitor same process name in different groups.");
652 g_rdt->ngroups[i].events = events;
653 g_rdt->pngroups[i] = calloc(1, sizeof(*g_rdt->pngroups[i]));
654 if (g_rdt->pngroups[i] == NULL) {
657 ": Failed to allocate memory for process name monitoring data.");
670 * Adds pid at the end of the pids list.
671 * Allocates memory for new pid element, it is up to user to free it.
674 * `list' Head of target pids_list.
675 * `pid' Pid to be added.
678 * On success, returns 0.
679 * -1 on memory allocation error.
681 static int pids_list_add_pid(pids_list_t **list, const pid_t pid) {
684 pids_list_t *new_element = calloc(1, sizeof(*new_element));
686 if (new_element == NULL) {
687 ERROR(RDT_PLUGIN ": Alloc error\n");
690 new_element->pid = pid;
691 new_element->next = NULL;
693 pids_list_t **current = list;
694 while (*current != NULL) {
695 current = &((*current)->next);
697 *current = new_element;
703 * pids_list_contains_pid
706 * Tests if pids list contains specific pid.
709 * `list' Head of pids_list.
710 * `pid' Pid to be searched for.
713 * If PID found in list, returns 1,
714 * Otherwise returns 0.
716 static int pids_list_contains_pid(pids_list_t *list, const pid_t pid) {
719 pids_list_t *current = list;
720 while (current != NULL) {
721 if (current->pid == pid)
723 current = current->next;
730 * pids_list_add_pids_list
733 * Adds pids list at the end of the pids list.
734 * Allocates memory for new pid elements, it is up to user to free it.
735 * Increases dst_num by a number of added PIDs.
738 * `dst' Head of target PIDs list.
739 * `src' Head of source PIDs list.
740 * `dst_num' Variable to be increased by a number of appended PIDs.
743 * On success, returns 0.
744 * -1 on memory allocation error.
746 static int pids_list_add_pids_list(pids_list_t **dst, pids_list_t *src,
752 pids_list_t *current = src;
755 while (current != NULL) {
756 ret = pids_list_add_pid(dst, current->pid);
761 current = current->next;
772 * Reads process name from given pid directory.
773 * Strips new-line character (\n).
776 * `procfs_path` Path to systems proc directory (e.g. /proc)
777 * `pid_entry' Dirent for PID directory
778 * `name' Output buffer for process name, recommended proc_comm.
779 * `out_size' Output buffer size, recommended sizeof(proc_comm)
782 * On success, the number of read bytes (includes stripped \n).
783 * -1 on file open error
785 static int read_proc_name(const char *procfs_path,
786 const struct dirent *pid_entry, char *name,
787 const size_t out_size) {
792 memset(name, 0, out_size);
794 const char *comm_file_name = "comm";
796 char *path = ssnprintf_alloc("%s/%s/%s", procfs_path, pid_entry->d_name,
799 FILE *f = fopen(path, "r");
801 ERROR(RDT_PLUGIN ": Failed to open comm file, error: %d\n", errno);
805 size_t read_length = fread(name, sizeof(char), out_size, f);
808 /* strip new line ending */
809 char *newline = strchr(name, '\n');
822 * Gets pid number for given /proc/pid directory entry or
823 * returns error if input directory does not hold PID information.
826 * `entry' Dirent for PID directory
827 * `pid' PID number to be filled
830 * 0 on success. Negative number on error:
831 * -1: given entry is not a directory
832 * -2: PID conversion error
834 static int get_pid_number(struct dirent *entry, pid_t *pid) {
835 char *tmp_end; /* used for strtoul error check*/
837 if (pid == NULL || entry == NULL)
840 if (entry->d_type != DT_DIR)
843 /* trying to get pid number from directory name*/
844 *pid = strtoul(entry->d_name, &tmp_end, 10);
845 if (*tmp_end != '\0') {
846 return -2; /* conversion failed, not proc-pid */
848 /* all checks passed, marking as success */
854 * fetch_pids_for_procs
857 * Finds PIDs matching given process's names.
858 * Searches all PID directories in /proc fs and
859 * allocates memory for proc_pids structs, it is up to user to free it.
860 * Output array will have same element count as input array.
863 * `procfs_path' Path to systems proc directory (e.g. /proc)
864 * `procs' Array of null-terminated strings with
865 * process' names to search for
866 * `procs_size' procs array element count
867 * `proc_pids_array' Address of pointer, under which new
868 * array of proc_pids will be allocated. Must be NULL.
871 * 0 on success. Negative number on error:
872 * -1: could not open /proc dir
874 __attribute__((unused)) /* TODO: remove this attribute when PID monitoring is
877 fetch_pids_for_procs(const char *procfs_path, const char **procs_names_array,
878 const size_t procs_names_array_size,
879 proc_pids_t **proc_pids_array) {
881 assert(procs_names_array);
882 assert(procs_names_array_size);
883 assert(proc_pids_array);
884 assert(NULL == *proc_pids_array);
886 DIR *proc_dir = opendir(procfs_path);
887 if (proc_dir == NULL) {
888 ERROR(RDT_PLUGIN ": Could not open %s directory, error: %d", procfs_path,
893 /* Copy procs names to output array. Initialize pids list with NULL value. */
895 calloc(procs_names_array_size, sizeof(**proc_pids_array));
896 for (size_t i = 0; i < procs_names_array_size; ++i) {
897 sstrncpy((*proc_pids_array)[i].proccess_name, procs_names_array[i],
898 STATIC_ARRAY_SIZE((*proc_pids_array)[i].proccess_name));
899 (*proc_pids_array)[i].pids = NULL;
902 /* Go through procfs and find PIDS and their comms */
903 struct dirent *entry;
904 while ((entry = readdir(proc_dir)) != NULL) {
907 int pid_conversion = get_pid_number(entry, &pid);
908 if (pid_conversion < 0)
913 read_proc_name(procfs_path, entry, comm, sizeof(proc_comm_t));
914 if (read_result <= 0) {
915 ERROR(RDT_PLUGIN ": Comm file skipped. Read result: %d", read_result);
919 /* Try to find comm in input procs array (proc_pids_array has same names) */
920 for (size_t i = 0; i < procs_names_array_size; ++i) {
921 if (0 == strncmp(comm, (*proc_pids_array)[i].proccess_name,
922 STATIC_ARRAY_SIZE(comm)))
923 pids_list_add_pid(&((*proc_pids_array)[i].pids), pid);
927 int close_result = closedir(proc_dir);
928 if (0 != close_result) {
929 ERROR(RDT_PLUGIN ": failed to close %s directory, error: %d", procfs_path,
936 static void rdt_pqos_log(void *context, const size_t size, const char *msg) {
937 DEBUG(RDT_PLUGIN ": %s", msg);
940 static int rdt_preinit(void) {
944 /* already initialized if config callback was called before init callback */
948 g_rdt = calloc(1, sizeof(*g_rdt));
950 ERROR(RDT_PLUGIN ": Failed to allocate memory for rdt context.");
954 struct pqos_config pqos = {.fd_log = -1,
955 .callback_log = rdt_pqos_log,
959 ret = pqos_init(&pqos);
960 if (ret != PQOS_RETVAL_OK) {
961 ERROR(RDT_PLUGIN ": Error initializing PQoS library!");
962 goto rdt_preinit_error1;
965 ret = pqos_cap_get(&g_rdt->pqos_cap, &g_rdt->pqos_cpu);
966 if (ret != PQOS_RETVAL_OK) {
967 ERROR(RDT_PLUGIN ": Error retrieving PQoS capabilities.");
968 goto rdt_preinit_error2;
971 ret = pqos_cap_get_type(g_rdt->pqos_cap, PQOS_CAP_TYPE_MON, &g_rdt->cap_mon);
972 if (ret == PQOS_RETVAL_PARAM) {
973 ERROR(RDT_PLUGIN ": Error retrieving monitoring capabilities.");
974 goto rdt_preinit_error2;
977 if (g_rdt->cap_mon == NULL) {
980 ": Monitoring capability not detected. Nothing to do for the plugin.");
981 goto rdt_preinit_error2;
984 /* Reset pqos monitoring groups registers */
999 static int rdt_config(oconfig_item_t *ci) {
1000 if (rdt_preinit() != 0) {
1001 g_state = CONFIGURATION_ERROR;
1002 /* if we return -1 at this point collectd
1003 reports a failure in configuration and
1009 for (int i = 0; i < ci->children_num; i++) {
1010 oconfig_item_t *child = ci->children + i;
1012 if (strncasecmp("Cores", child->key, (size_t)strlen("Cores")) == 0) {
1013 if (rdt_config_cgroups(child) != 0) {
1014 g_state = CONFIGURATION_ERROR;
1015 /* if we return -1 at this point collectd
1016 reports a failure in configuration and
1024 #endif /* COLLECT_DEBUG */
1025 } else if (strncasecmp("Processes", child->key,
1026 (size_t)strlen("Processes")) == 0) {
1027 if (rdt_config_ngroups(child) != 0) {
1028 g_state = CONFIGURATION_ERROR;
1029 /* if we return -1 at this point collectd
1030 reports a failure in configuration and
1038 #endif /* COLLECT_DEBUG */
1040 ERROR(RDT_PLUGIN ": Unknown configuration parameter \"%s\".", child->key);
1047 static void rdt_submit_derive(const char *cgroup, const char *type,
1048 const char *type_instance, derive_t value) {
1049 value_list_t vl = VALUE_LIST_INIT;
1051 vl.values = &(value_t){.derive = value};
1054 sstrncpy(vl.plugin, RDT_PLUGIN, sizeof(vl.plugin));
1055 snprintf(vl.plugin_instance, sizeof(vl.plugin_instance), "%s", cgroup);
1056 sstrncpy(vl.type, type, sizeof(vl.type));
1058 sstrncpy(vl.type_instance, type_instance, sizeof(vl.type_instance));
1060 plugin_dispatch_values(&vl);
1063 static void rdt_submit_gauge(const char *cgroup, const char *type,
1064 const char *type_instance, gauge_t value) {
1065 value_list_t vl = VALUE_LIST_INIT;
1067 vl.values = &(value_t){.gauge = value};
1070 sstrncpy(vl.plugin, RDT_PLUGIN, sizeof(vl.plugin));
1071 snprintf(vl.plugin_instance, sizeof(vl.plugin_instance), "%s", cgroup);
1072 sstrncpy(vl.type, type, sizeof(vl.type));
1074 sstrncpy(vl.type_instance, type_instance, sizeof(vl.type_instance));
1076 plugin_dispatch_values(&vl);
1079 static int rdt_pid_list_diff(pids_list_t *prev, pids_list_t *curr,
1080 pids_list_t **added, size_t *added_num,
1081 pids_list_t **removed, size_t *removed_num) {
1082 assert(prev || curr);
1087 /* append all PIDs from curr to added*/
1088 return pids_list_add_pids_list(added, curr, added_num);
1089 } else if (NULL == curr) {
1090 /* append all PIDs from prev to removed*/
1091 return pids_list_add_pids_list(removed, prev, removed_num);
1094 pids_list_t *item = prev;
1095 while (item != NULL) {
1096 if (0 == pids_list_contains_pid(curr, item->pid)) {
1097 pids_list_add_pid(removed, item->pid);
1104 while (item != NULL) {
1105 if (0 == pids_list_contains_pid(prev, item->pid)) {
1106 pids_list_add_pid(added, item->pid);
1115 static int rdt_refresh_ngroup(rdt_name_group_t *ngroup) {
1119 DEBUG(RDT_PLUGIN ": rdt_refresh_ngroup: \'%s\' process names group.",
1122 proc_pids_t *pids_array_prev = ngroup->proc_pids_array;
1123 proc_pids_t *pids_array_curr = NULL;
1126 fetch_pids_for_procs(RDT_PROC_PATH, (const char **)ngroup->names,
1127 ngroup->num_names, &pids_array_curr);
1129 if (0 != fetch_result) {
1130 ERROR(RDT_PLUGIN ": rdt_refresh_ngroup: failed to fetch PIDs for \'%s\' "
1131 "process names group.",
1133 return fetch_result;
1136 if (NULL == pids_array_prev) {
1137 /*no PIDs info yet, just save current one for next iteration*/
1138 ngroup->proc_pids_array = pids_array_curr;
1142 pids_list_t *added = NULL;
1143 size_t added_num = 0;
1145 pids_list_t *removed = NULL;
1146 size_t removed_num = 0;
1148 for (size_t i = 0; i < ngroup->num_names; ++i) {
1149 if (NULL == pids_array_prev[i].pids && NULL == pids_array_curr[i].pids)
1151 rdt_pid_list_diff(pids_array_prev[i].pids, pids_array_curr[i].pids, &added,
1152 &added_num, &removed, &removed_num);
1155 DEBUG(RDT_PLUGIN ": rdt_refresh_ngroup: \'%s\' process names group, added: "
1157 ngroup->desc, (unsigned)added_num, (unsigned)removed_num);
1159 if (added_num != 0 || removed_num != 0) {
1160 ngroup->proc_pids_array = pids_array_curr;
1162 /*call pqos add and remove functions here*/
1165 /*free prev PID lists, only if new was saved in ngroup struct*/
1166 if (pids_array_prev && pids_array_prev != ngroup->proc_pids_array) {
1167 for (size_t i = 0; i < ngroup->num_names; ++i) {
1168 if (NULL == pids_array_prev[i].pids)
1170 pids_list_free(pids_array_prev[i].pids);
1173 sfree(pids_array_prev);
1179 static int rdt_read(__attribute__((unused)) user_data_t *ud) {
1182 if (g_rdt == NULL) {
1183 ERROR(RDT_PLUGIN ": rdt_read: plugin not initialized.");
1187 ret = pqos_mon_poll(&g_rdt->pcgroups[0], (unsigned)g_rdt->cores.num_cgroups);
1188 if (ret != PQOS_RETVAL_OK) {
1189 ERROR(RDT_PLUGIN ": Failed to poll monitoring data.");
1193 for (size_t i = 0; i < g_rdt->num_ngroups; i++)
1194 rdt_refresh_ngroup(&g_rdt->ngroups[i]);
1198 #endif /* COLLECT_DEBUG */
1200 for (size_t i = 0; i < g_rdt->cores.num_cgroups; i++) {
1201 core_group_t *cgroup = g_rdt->cores.cgroups + i;
1202 enum pqos_mon_event mbm_events =
1203 (PQOS_MON_EVENT_LMEM_BW | PQOS_MON_EVENT_TMEM_BW |
1204 PQOS_MON_EVENT_RMEM_BW);
1206 const struct pqos_event_values *pv = &g_rdt->pcgroups[i]->values;
1208 /* Submit only monitored events data */
1210 if (g_rdt->events[i] & PQOS_MON_EVENT_L3_OCCUP)
1211 rdt_submit_gauge(cgroup->desc, "bytes", "llc", pv->llc);
1213 if (g_rdt->events[i] & PQOS_PERF_EVENT_IPC)
1214 rdt_submit_gauge(cgroup->desc, "ipc", NULL, pv->ipc);
1216 if (g_rdt->events[i] & mbm_events) {
1217 rdt_submit_derive(cgroup->desc, "memory_bandwidth", "local",
1218 pv->mbm_local_delta);
1219 rdt_submit_derive(cgroup->desc, "memory_bandwidth", "remote",
1220 pv->mbm_remote_delta);
1227 static int rdt_init(void) {
1230 if (g_state == CONFIGURATION_ERROR)
1233 ret = rdt_preinit();
1237 /* Start monitoring */
1238 for (size_t i = 0; i < g_rdt->cores.num_cgroups; i++) {
1239 core_group_t *cg = g_rdt->cores.cgroups + i;
1241 ret = pqos_mon_start(cg->num_cores, cg->cores, g_rdt->events[i],
1242 (void *)cg->desc, g_rdt->pcgroups[i]);
1244 if (ret != PQOS_RETVAL_OK)
1245 ERROR(RDT_PLUGIN ": Error starting monitoring group %s (pqos status=%d)",
1252 static int rdt_shutdown(void) {
1255 DEBUG(RDT_PLUGIN ": rdt_shutdown.");
1260 /* Stop monitoring */
1261 for (size_t i = 0; i < g_rdt->cores.num_cgroups; i++) {
1262 pqos_mon_stop(g_rdt->pcgroups[i]);
1266 if (ret != PQOS_RETVAL_OK)
1267 ERROR(RDT_PLUGIN ": Error shutting down PQoS library.");
1275 void module_register(void) {
1276 plugin_register_init(RDT_PLUGIN, rdt_init);
1277 plugin_register_complex_config(RDT_PLUGIN, rdt_config);
1278 plugin_register_complex_read(NULL, RDT_PLUGIN, rdt_read, 0, NULL);
1279 plugin_register_shutdown(RDT_PLUGIN, rdt_shutdown);