2 * collectd - src/intel_rdt.c
4 * Copyright(c) 2016-2018 Intel Corporation. All rights reserved.
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
25 * Serhiy Pshyk <serhiyx.pshyk@intel.com>
26 * Starzyk, Mateusz <mateuszx.starzyk@intel.com>
30 #include "utils/common/common.h"
31 #include "utils/config_cores/config_cores.h"
34 #define RDT_PLUGIN "intel_rdt"
36 #define RDT_MAX_SOCKETS 8
37 #define RDT_MAX_SOCKET_CORES 64
38 #define RDT_MAX_CORES (RDT_MAX_SOCKET_CORES * RDT_MAX_SOCKETS)
40 * Process name inside comm file is limited to 16 chars.
41 * More info here: http://man7.org/linux/man-pages/man5/proc.5.html
43 #define RDT_MAX_PROC_COMM_LENGTH 16
46 * In future: Start monitoring for PID group. For perf grouping will be added.
47 * Currently: Start monitoring only for the first PID.
49 __attribute__((unused)) static int
50 pqos_mon_start_pids(const unsigned num_pids, const pid_t *pids,
51 const enum pqos_mon_event event, void *context,
52 struct pqos_mon_data *group) {
56 return pqos_mon_start_pid(pids[0], event, context, group);
60 * In future: Add PIDs to the monitoring group. Supported for resctrl monitoring
62 * Currently: Does nothing.
64 __attribute__((unused)) static int
65 pqos_mon_add_pids(const unsigned num_pids, const pid_t *pids, void *context,
66 struct pqos_mon_data *group) {
67 return PQOS_RETVAL_OK;
71 * In future: Remove PIDs from the monitoring group. Supported for resctrl
73 * Currently: Does nothing.
75 __attribute__((unused)) static int
76 pqos_mon_remove_pids(const unsigned num_pids, const pid_t *pids, void *context,
77 struct pqos_mon_data *group) {
78 return PQOS_RETVAL_OK;
87 core_groups_list_t cores;
88 enum pqos_mon_event events[RDT_MAX_CORES];
89 struct pqos_mon_data *pgroups[RDT_MAX_CORES];
91 const struct pqos_cpuinfo *pqos_cpu;
92 const struct pqos_cap *pqos_cap;
93 const struct pqos_capability *cap_mon;
95 typedef struct rdt_ctx_s rdt_ctx_t;
97 static rdt_ctx_t *g_rdt;
99 static rdt_config_status g_state = UNKNOWN;
102 static void rdt_dump_cgroups(void) {
103 char cores[RDT_MAX_CORES * 4];
108 DEBUG(RDT_PLUGIN ": Core Groups Dump");
109 DEBUG(RDT_PLUGIN ": groups count: %" PRIsz, g_rdt->num_groups);
111 for (size_t i = 0; i < g_rdt->num_groups; i++) {
112 core_group_t *cgroup = g_rdt->cores.cgroups + i;
114 memset(cores, 0, sizeof(cores));
115 for (size_t j = 0; j < cgroup->num_cores; j++) {
116 snprintf(cores + strlen(cores), sizeof(cores) - strlen(cores) - 1, " %d",
120 DEBUG(RDT_PLUGIN ": group[%zu]:", i);
121 DEBUG(RDT_PLUGIN ": description: %s", cgroup->desc);
122 DEBUG(RDT_PLUGIN ": cores: %s", cores);
123 DEBUG(RDT_PLUGIN ": events: 0x%X", g_rdt->events[i]);
129 static inline double bytes_to_kb(const double bytes) { return bytes / 1024.0; }
131 static inline double bytes_to_mb(const double bytes) {
132 return bytes / (1024.0 * 1024.0);
135 static void rdt_dump_data(void) {
137 * CORE - monitored group of cores
138 * RMID - Resource Monitoring ID associated with the monitored group
139 * LLC - last level cache occupancy
140 * MBL - local memory bandwidth
141 * MBR - remote memory bandwidth
143 DEBUG(" CORE RMID LLC[KB] MBL[MB] MBR[MB]");
144 for (size_t i = 0; i < g_rdt->num_groups; i++) {
145 const struct pqos_event_values *pv = &g_rdt->pgroups[i]->values;
147 double llc = bytes_to_kb(pv->llc);
148 double mbr = bytes_to_mb(pv->mbm_remote_delta);
149 double mbl = bytes_to_mb(pv->mbm_local_delta);
151 DEBUG(" [%s] %8u %10.1f %10.1f %10.1f", g_rdt->cores.cgroups[i].desc,
152 g_rdt->pgroups[i]->poll_ctx[0].rmid, llc, mbl, mbr);
155 #endif /* COLLECT_DEBUG */
157 static void rdt_free_cgroups(void) {
158 config_cores_cleanup(&g_rdt->cores);
159 for (int i = 0; i < RDT_MAX_CORES; i++) {
160 sfree(g_rdt->pgroups[i]);
164 static int rdt_default_cgroups(void) {
165 unsigned num_cores = g_rdt->pqos_cpu->num_cores;
167 g_rdt->cores.cgroups = calloc(num_cores, sizeof(*g_rdt->cores.cgroups));
168 if (g_rdt->cores.cgroups == NULL) {
169 ERROR(RDT_PLUGIN ": Error allocating core groups array");
172 g_rdt->cores.num_cgroups = num_cores;
174 /* configure each core in separate group */
175 for (unsigned i = 0; i < num_cores; i++) {
176 core_group_t *cgroup = g_rdt->cores.cgroups + i;
177 char desc[DATA_MAX_NAME_LEN];
179 /* set core group info */
180 cgroup->cores = calloc(1, sizeof(*cgroup->cores));
181 if (cgroup->cores == NULL) {
182 ERROR(RDT_PLUGIN ": Error allocating cores array");
186 cgroup->num_cores = 1;
187 cgroup->cores[0] = i;
189 snprintf(desc, sizeof(desc), "%d", g_rdt->pqos_cpu->cores[i].lcore);
190 cgroup->desc = strdup(desc);
191 if (cgroup->desc == NULL) {
192 ERROR(RDT_PLUGIN ": Error allocating core group description");
201 static int rdt_is_core_id_valid(unsigned int core_id) {
203 for (unsigned int i = 0; i < g_rdt->pqos_cpu->num_cores; i++)
204 if (core_id == g_rdt->pqos_cpu->cores[i].lcore)
210 static int rdt_config_cgroups(oconfig_item_t *item) {
212 enum pqos_mon_event events = 0;
214 if (config_cores_parse(item, &g_rdt->cores) < 0) {
216 ERROR(RDT_PLUGIN ": Error parsing core groups configuration.");
219 n = g_rdt->cores.num_cgroups;
221 /* validate configured core id values */
222 for (size_t group_idx = 0; group_idx < n; group_idx++) {
223 core_group_t *cgroup = g_rdt->cores.cgroups + group_idx;
224 for (size_t core_idx = 0; core_idx < cgroup->num_cores; core_idx++) {
225 if (!rdt_is_core_id_valid(cgroup->cores[core_idx])) {
226 ERROR(RDT_PLUGIN ": Core group '%s' contains invalid core id '%u'",
227 cgroup->desc, cgroup->cores[core_idx]);
235 /* create default core groups if "Cores" config option is empty */
236 int ret = rdt_default_cgroups();
239 ERROR(RDT_PLUGIN ": Error creating default core groups configuration.");
244 ": No core groups configured. Default core groups created.");
247 /* Get all available events on this platform */
248 for (unsigned int i = 0; i < g_rdt->cap_mon->u.mon->num_events; i++)
249 events |= g_rdt->cap_mon->u.mon->events[i].type;
251 events &= ~(PQOS_PERF_EVENT_LLC_MISS);
253 DEBUG(RDT_PLUGIN ": Number of cores in the system: %u",
254 g_rdt->pqos_cpu->num_cores);
255 DEBUG(RDT_PLUGIN ": Available events to monitor: %#x", events);
257 g_rdt->num_groups = n;
258 for (size_t i = 0; i < n; i++) {
259 for (size_t j = 0; j < i; j++) {
261 found = config_cores_cmp_cgroups(&g_rdt->cores.cgroups[j],
262 &g_rdt->cores.cgroups[i]);
265 ERROR(RDT_PLUGIN ": Cannot monitor same cores in different groups.");
270 g_rdt->events[i] = events;
271 g_rdt->pgroups[i] = calloc(1, sizeof(*g_rdt->pgroups[i]));
272 if (g_rdt->pgroups[i] == NULL) {
274 ERROR(RDT_PLUGIN ": Failed to allocate memory for monitoring data.");
282 /* Helper typedef for process name array
283 * Extra 1 char is added for string null termination.
285 typedef char proc_comm_t[RDT_MAX_PROC_COMM_LENGTH + 1];
287 /* Linked one-way list of pids. */
288 typedef struct pids_list_s {
290 struct pids_list_s *next;
293 /* Holds process name and list of pids assigned to that name */
294 typedef struct proc_pids_s {
295 proc_comm_t proccess_name;
304 * Adds pid at the end of the pids list.
305 * Allocates memory for new pid element, it is up to user to free it.
308 * `list' Head of target pids_list.
309 * `pid' Pid to be added.
312 * On success, returns 0.
313 * -1 on memory allocation error.
315 static int pids_list_add_pid(pids_list_t **list, const pid_t pid) {
316 pids_list_t *new_element = calloc(1, sizeof(*new_element));
318 if (new_element == NULL) {
319 ERROR(RDT_PLUGIN ": Alloc error\n");
322 new_element->pid = pid;
323 new_element->next = NULL;
325 pids_list_t **current = list;
326 while (*current != NULL) {
327 current = &((*current)->next);
329 *current = new_element;
338 * Reads process name from given pid directory.
339 * Strips new-line character (\n).
342 * `procfs_path` Path to systems proc directory (e.g. /proc)
343 * `pid_entry' Dirent for PID directory
344 * `name' Output buffer for process name, recommended proc_comm.
345 * `out_size' Output buffer size, recommended sizeof(proc_comm)
348 * On success, the number of read bytes (includes stripped \n).
349 * -1 on file open error
351 static int read_proc_name(const char *procfs_path,
352 const struct dirent *pid_entry, char *name,
353 const size_t out_size) {
358 memset(name, 0, out_size);
360 const char *comm_file_name = "comm";
362 char *path = ssnprintf_alloc("%s/%s/%s", procfs_path, pid_entry->d_name,
365 FILE *f = fopen(path, "r");
367 ERROR(RDT_PLUGIN ": Failed to open comm file, error: %d\n", errno);
371 size_t read_length = fread(name, sizeof(char), out_size, f);
374 /* strip new line ending */
375 char *newline = strchr(name, '\n');
388 * Gets pid number for given /proc/pid directory entry or
389 * returns error if input directory does not hold PID information.
392 * `entry' Dirent for PID directory
393 * `pid' PID number to be filled
396 * 0 on success. Negative number on error:
397 * -1: given entry is not a directory
398 * -2: PID conversion error
400 static int get_pid_number(struct dirent *entry, pid_t *pid) {
401 char *tmp_end; /* used for strtoul error check*/
403 if (pid == NULL || entry == NULL)
406 if (entry->d_type != DT_DIR)
409 /* trying to get pid number from directory name*/
410 *pid = strtoul(entry->d_name, &tmp_end, 10);
411 if (*tmp_end != '\0') {
412 return -2; /* conversion failed, not proc-pid */
414 /* all checks passed, marking as success */
420 * fetch_pids_for_procs
423 * Finds PIDs matching given process's names.
424 * Searches all PID directories in /proc fs and
425 * allocates memory for proc_pids structs, it is up to user to free it.
426 * Output array will have same element count as input array.
429 * `procfs_path' Path to systems proc directory (e.g. /proc)
430 * `procs' Array of null-terminated strings with
431 * process' names to search for
432 * `procs_size' procs array element count
433 * `proc_pids_array' Address of pointer, under which new
434 * array of proc_pids will be allocated. Must be NULL.
437 * 0 on success. Negative number on error:
438 * -1: could not open /proc dir
440 __attribute__((unused)) /* TODO: remove this attribute when PID monitoring is
443 fetch_pids_for_procs(const char *procfs_path, const char **procs_names_array,
444 const size_t procs_names_array_size,
445 proc_pids_t **proc_pids_array) {
447 assert(procs_names_array);
448 assert(procs_names_array_size);
449 assert(proc_pids_array);
450 assert(NULL == *proc_pids_array);
452 DIR *proc_dir = opendir(procfs_path);
453 if (proc_dir == NULL) {
454 ERROR(RDT_PLUGIN ": Could not open %s directory, error: %d", procfs_path,
459 /* Copy procs names to output array. Initialize pids list with NULL value. */
461 calloc(procs_names_array_size, sizeof(**proc_pids_array));
462 for (size_t i = 0; i < procs_names_array_size; ++i) {
463 sstrncpy((*proc_pids_array)[i].proccess_name, procs_names_array[i],
464 STATIC_ARRAY_SIZE((*proc_pids_array)[i].proccess_name));
465 (*proc_pids_array)[i].pids = NULL;
468 /* Go through procfs and find PIDS and their comms */
469 struct dirent *entry;
470 while ((entry = readdir(proc_dir)) != NULL) {
473 int pid_conversion = get_pid_number(entry, &pid);
474 if (pid_conversion < 0)
479 read_proc_name(procfs_path, entry, comm, sizeof(proc_comm_t));
480 if (read_result <= 0) {
481 ERROR(RDT_PLUGIN ": Comm file skipped. Read result: %d", read_result);
485 /* Try to find comm in input procs array (proc_pids_array has same names) */
486 for (size_t i = 0; i < procs_names_array_size; ++i) {
487 if (0 == strncmp(comm, (*proc_pids_array)[i].proccess_name,
488 STATIC_ARRAY_SIZE(comm)))
489 pids_list_add_pid(&((*proc_pids_array)[i].pids), pid);
493 int close_result = closedir(proc_dir);
494 if (0 != close_result) {
495 ERROR(RDT_PLUGIN ": failed to close %s directory, error: %d", procfs_path,
502 static void rdt_pqos_log(void *context, const size_t size, const char *msg) {
503 DEBUG(RDT_PLUGIN ": %s", msg);
506 static int rdt_preinit(void) {
510 /* already initialized if config callback was called before init callback */
514 g_rdt = calloc(1, sizeof(*g_rdt));
516 ERROR(RDT_PLUGIN ": Failed to allocate memory for rdt context.");
520 struct pqos_config pqos = {.fd_log = -1,
521 .callback_log = rdt_pqos_log,
525 ret = pqos_init(&pqos);
526 if (ret != PQOS_RETVAL_OK) {
527 ERROR(RDT_PLUGIN ": Error initializing PQoS library!");
528 goto rdt_preinit_error1;
531 ret = pqos_cap_get(&g_rdt->pqos_cap, &g_rdt->pqos_cpu);
532 if (ret != PQOS_RETVAL_OK) {
533 ERROR(RDT_PLUGIN ": Error retrieving PQoS capabilities.");
534 goto rdt_preinit_error2;
537 ret = pqos_cap_get_type(g_rdt->pqos_cap, PQOS_CAP_TYPE_MON, &g_rdt->cap_mon);
538 if (ret == PQOS_RETVAL_PARAM) {
539 ERROR(RDT_PLUGIN ": Error retrieving monitoring capabilities.");
540 goto rdt_preinit_error2;
543 if (g_rdt->cap_mon == NULL) {
546 ": Monitoring capability not detected. Nothing to do for the plugin.");
547 goto rdt_preinit_error2;
550 /* Reset pqos monitoring groups registers */
565 static int rdt_config(oconfig_item_t *ci) {
566 if (rdt_preinit() != 0) {
567 g_state = CONFIGURATION_ERROR;
568 /* if we return -1 at this point collectd
569 reports a failure in configuration and
575 for (int i = 0; i < ci->children_num; i++) {
576 oconfig_item_t *child = ci->children + i;
578 if (strcasecmp("Cores", child->key) == 0) {
579 if (rdt_config_cgroups(child) != 0) {
580 g_state = CONFIGURATION_ERROR;
581 /* if we return -1 at this point collectd
582 reports a failure in configuration and
590 #endif /* COLLECT_DEBUG */
592 ERROR(RDT_PLUGIN ": Unknown configuration parameter \"%s\".", child->key);
599 static void rdt_submit_derive(const char *cgroup, const char *type,
600 const char *type_instance, derive_t value) {
601 value_list_t vl = VALUE_LIST_INIT;
603 vl.values = &(value_t){.derive = value};
606 sstrncpy(vl.plugin, RDT_PLUGIN, sizeof(vl.plugin));
607 snprintf(vl.plugin_instance, sizeof(vl.plugin_instance), "%s", cgroup);
608 sstrncpy(vl.type, type, sizeof(vl.type));
610 sstrncpy(vl.type_instance, type_instance, sizeof(vl.type_instance));
612 plugin_dispatch_values(&vl);
615 static void rdt_submit_gauge(const char *cgroup, const char *type,
616 const char *type_instance, gauge_t value) {
617 value_list_t vl = VALUE_LIST_INIT;
619 vl.values = &(value_t){.gauge = value};
622 sstrncpy(vl.plugin, RDT_PLUGIN, sizeof(vl.plugin));
623 snprintf(vl.plugin_instance, sizeof(vl.plugin_instance), "%s", cgroup);
624 sstrncpy(vl.type, type, sizeof(vl.type));
626 sstrncpy(vl.type_instance, type_instance, sizeof(vl.type_instance));
628 plugin_dispatch_values(&vl);
631 static int rdt_read(__attribute__((unused)) user_data_t *ud) {
635 ERROR(RDT_PLUGIN ": rdt_read: plugin not initialized.");
639 ret = pqos_mon_poll(&g_rdt->pgroups[0], (unsigned)g_rdt->num_groups);
640 if (ret != PQOS_RETVAL_OK) {
641 ERROR(RDT_PLUGIN ": Failed to poll monitoring data.");
647 #endif /* COLLECT_DEBUG */
649 for (size_t i = 0; i < g_rdt->num_groups; i++) {
650 core_group_t *cgroup = g_rdt->cores.cgroups + i;
652 enum pqos_mon_event mbm_events =
653 (PQOS_MON_EVENT_LMEM_BW | PQOS_MON_EVENT_TMEM_BW |
654 PQOS_MON_EVENT_RMEM_BW);
656 const struct pqos_event_values *pv = &g_rdt->pgroups[i]->values;
658 /* Submit only monitored events data */
660 if (g_rdt->events[i] & PQOS_MON_EVENT_L3_OCCUP)
661 rdt_submit_gauge(cgroup->desc, "bytes", "llc", pv->llc);
663 if (g_rdt->events[i] & PQOS_PERF_EVENT_IPC)
664 rdt_submit_gauge(cgroup->desc, "ipc", NULL, pv->ipc);
666 if (g_rdt->events[i] & mbm_events) {
667 rdt_submit_derive(cgroup->desc, "memory_bandwidth", "local",
668 pv->mbm_local_delta);
669 rdt_submit_derive(cgroup->desc, "memory_bandwidth", "remote",
670 pv->mbm_remote_delta);
677 static int rdt_init(void) {
680 if (g_state == CONFIGURATION_ERROR)
687 /* Start monitoring */
688 for (size_t i = 0; i < g_rdt->num_groups; i++) {
689 core_group_t *cg = g_rdt->cores.cgroups + i;
691 ret = pqos_mon_start(cg->num_cores, cg->cores, g_rdt->events[i],
692 (void *)cg->desc, g_rdt->pgroups[i]);
694 if (ret != PQOS_RETVAL_OK)
695 ERROR(RDT_PLUGIN ": Error starting monitoring group %s (pqos status=%d)",
702 static int rdt_shutdown(void) {
705 DEBUG(RDT_PLUGIN ": rdt_shutdown.");
710 /* Stop monitoring */
711 for (size_t i = 0; i < g_rdt->num_groups; i++) {
712 pqos_mon_stop(g_rdt->pgroups[i]);
716 if (ret != PQOS_RETVAL_OK)
717 ERROR(RDT_PLUGIN ": Error shutting down PQoS library.");
725 void module_register(void) {
726 plugin_register_init(RDT_PLUGIN, rdt_init);
727 plugin_register_complex_config(RDT_PLUGIN, rdt_config);
728 plugin_register_complex_read(NULL, RDT_PLUGIN, rdt_read, 0, NULL);
729 plugin_register_shutdown(RDT_PLUGIN, rdt_shutdown);