2 * collectd - src/intel_rdt.c
4 * Copyright(c) 2016-2018 Intel Corporation. All rights reserved.
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
25 * Serhiy Pshyk <serhiyx.pshyk@intel.com>
29 #include "utils/common/common.h"
30 #include "utils/config_cores/config_cores.h"
34 #define RDT_PLUGIN "intel_rdt"
36 #define RDT_MAX_SOCKETS 8
37 #define RDT_MAX_SOCKET_CORES 64
38 #define RDT_MAX_CORES (RDT_MAX_SOCKET_CORES * RDT_MAX_SOCKETS)
46 core_groups_list_t cores;
47 enum pqos_mon_event events[RDT_MAX_CORES];
48 struct pqos_mon_data *pgroups[RDT_MAX_CORES];
50 const struct pqos_cpuinfo *pqos_cpu;
51 const struct pqos_cap *pqos_cap;
52 const struct pqos_capability *cap_mon;
54 typedef struct rdt_ctx_s rdt_ctx_t;
56 static rdt_ctx_t *g_rdt;
58 static rdt_config_status g_state = UNKNOWN;
61 static void rdt_dump_cgroups(void) {
62 char cores[RDT_MAX_CORES * 4];
67 DEBUG(RDT_PLUGIN ": Core Groups Dump");
68 DEBUG(RDT_PLUGIN ": groups count: %" PRIsz, g_rdt->num_groups);
70 for (size_t i = 0; i < g_rdt->num_groups; i++) {
71 core_group_t *cgroup = g_rdt->cores.cgroups + i;
73 memset(cores, 0, sizeof(cores));
74 for (size_t j = 0; j < cgroup->num_cores; j++) {
75 snprintf(cores + strlen(cores), sizeof(cores) - strlen(cores) - 1, " %d",
79 DEBUG(RDT_PLUGIN ": group[%zu]:", i);
80 DEBUG(RDT_PLUGIN ": description: %s", cgroup->desc);
81 DEBUG(RDT_PLUGIN ": cores: %s", cores);
82 DEBUG(RDT_PLUGIN ": events: 0x%X", g_rdt->events[i]);
88 static inline double bytes_to_kb(const double bytes) { return bytes / 1024.0; }
90 static inline double bytes_to_mb(const double bytes) {
91 return bytes / (1024.0 * 1024.0);
94 static void rdt_dump_data(void) {
96 * CORE - monitored group of cores
97 * RMID - Resource Monitoring ID associated with the monitored group
98 * LLC - last level cache occupancy
99 * MBL - local memory bandwidth
100 * MBR - remote memory bandwidth
102 DEBUG(" CORE RMID LLC[KB] MBL[MB] MBR[MB]");
103 for (size_t i = 0; i < g_rdt->num_groups; i++) {
104 const struct pqos_event_values *pv = &g_rdt->pgroups[i]->values;
106 double llc = bytes_to_kb(pv->llc);
107 double mbr = bytes_to_mb(pv->mbm_remote_delta);
108 double mbl = bytes_to_mb(pv->mbm_local_delta);
110 DEBUG(" [%s] %8u %10.1f %10.1f %10.1f", g_rdt->cores.cgroups[i].desc,
111 g_rdt->pgroups[i]->poll_ctx[0].rmid, llc, mbl, mbr);
114 #endif /* COLLECT_DEBUG */
116 static void rdt_free_cgroups(void) {
117 config_cores_cleanup(&g_rdt->cores);
118 for (int i = 0; i < RDT_MAX_CORES; i++) {
119 sfree(g_rdt->pgroups[i]);
123 static int rdt_default_cgroups(void) {
124 unsigned num_cores = g_rdt->pqos_cpu->num_cores;
126 g_rdt->cores.cgroups = calloc(num_cores, sizeof(*g_rdt->cores.cgroups));
127 if (g_rdt->cores.cgroups == NULL) {
128 ERROR(RDT_PLUGIN ": Error allocating core groups array");
131 g_rdt->cores.num_cgroups = num_cores;
133 /* configure each core in separate group */
134 for (unsigned i = 0; i < num_cores; i++) {
135 core_group_t *cgroup = g_rdt->cores.cgroups + i;
136 char desc[DATA_MAX_NAME_LEN];
138 /* set core group info */
139 cgroup->cores = calloc(1, sizeof(*cgroup->cores));
140 if (cgroup->cores == NULL) {
141 ERROR(RDT_PLUGIN ": Error allocating cores array");
145 cgroup->num_cores = 1;
146 cgroup->cores[0] = i;
148 snprintf(desc, sizeof(desc), "%d", g_rdt->pqos_cpu->cores[i].lcore);
149 cgroup->desc = strdup(desc);
150 if (cgroup->desc == NULL) {
151 ERROR(RDT_PLUGIN ": Error allocating core group description");
160 static int rdt_is_core_id_valid(unsigned int core_id) {
162 for (unsigned int i = 0; i < g_rdt->pqos_cpu->num_cores; i++)
163 if (core_id == g_rdt->pqos_cpu->cores[i].lcore)
169 static int rdt_config_cgroups(oconfig_item_t *item) {
171 enum pqos_mon_event events = 0;
173 if (config_cores_parse(item, &g_rdt->cores) < 0) {
175 ERROR(RDT_PLUGIN ": Error parsing core groups configuration.");
178 n = g_rdt->cores.num_cgroups;
180 /* validate configured core id values */
181 for (size_t group_idx = 0; group_idx < n; group_idx++) {
182 core_group_t *cgroup = g_rdt->cores.cgroups + group_idx;
183 for (size_t core_idx = 0; core_idx < cgroup->num_cores; core_idx++) {
184 if (!rdt_is_core_id_valid(cgroup->cores[core_idx])) {
185 ERROR(RDT_PLUGIN ": Core group '%s' contains invalid core id '%u'",
186 cgroup->desc, cgroup->cores[core_idx]);
194 /* create default core groups if "Cores" config option is empty */
195 int ret = rdt_default_cgroups();
198 ERROR(RDT_PLUGIN ": Error creating default core groups configuration.");
203 ": No core groups configured. Default core groups created.");
206 /* Get all available events on this platform */
207 for (unsigned int i = 0; i < g_rdt->cap_mon->u.mon->num_events; i++)
208 events |= g_rdt->cap_mon->u.mon->events[i].type;
210 events &= ~(PQOS_PERF_EVENT_LLC_MISS);
212 DEBUG(RDT_PLUGIN ": Number of cores in the system: %u",
213 g_rdt->pqos_cpu->num_cores);
214 DEBUG(RDT_PLUGIN ": Available events to monitor: %#x", events);
216 g_rdt->num_groups = n;
217 for (size_t i = 0; i < n; i++) {
218 for (size_t j = 0; j < i; j++) {
220 found = config_cores_cmp_cgroups(&g_rdt->cores.cgroups[j],
221 &g_rdt->cores.cgroups[i]);
224 ERROR(RDT_PLUGIN ": Cannot monitor same cores in different groups.");
229 g_rdt->events[i] = events;
230 g_rdt->pgroups[i] = calloc(1, sizeof(*g_rdt->pgroups[i]));
231 if (g_rdt->pgroups[i] == NULL) {
233 ERROR(RDT_PLUGIN ": Failed to allocate memory for monitoring data.");
241 static void rdt_pqos_log(void *context, const size_t size, const char *msg) {
242 DEBUG(RDT_PLUGIN ": %s", msg);
245 static int rdt_preinit(void) {
249 /* already initialized if config callback was called before init callback */
253 g_rdt = calloc(1, sizeof(*g_rdt));
255 ERROR(RDT_PLUGIN ": Failed to allocate memory for rdt context.");
259 struct pqos_config pqos = {.fd_log = -1,
260 .callback_log = rdt_pqos_log,
264 ret = pqos_init(&pqos);
265 if (ret != PQOS_RETVAL_OK) {
266 ERROR(RDT_PLUGIN ": Error initializing PQoS library!");
267 goto rdt_preinit_error1;
270 ret = pqos_cap_get(&g_rdt->pqos_cap, &g_rdt->pqos_cpu);
271 if (ret != PQOS_RETVAL_OK) {
272 ERROR(RDT_PLUGIN ": Error retrieving PQoS capabilities.");
273 goto rdt_preinit_error2;
276 ret = pqos_cap_get_type(g_rdt->pqos_cap, PQOS_CAP_TYPE_MON, &g_rdt->cap_mon);
277 if (ret == PQOS_RETVAL_PARAM) {
278 ERROR(RDT_PLUGIN ": Error retrieving monitoring capabilities.");
279 goto rdt_preinit_error2;
282 if (g_rdt->cap_mon == NULL) {
285 ": Monitoring capability not detected. Nothing to do for the plugin.");
286 goto rdt_preinit_error2;
289 /* Reset pqos monitoring groups registers */
304 static int rdt_config(oconfig_item_t *ci) {
305 if (rdt_preinit() != 0) {
306 g_state = CONFIGURATION_ERROR;
307 /* if we return -1 at this point collectd
308 reports a failure in configuration and
314 for (int i = 0; i < ci->children_num; i++) {
315 oconfig_item_t *child = ci->children + i;
317 if (strcasecmp("Cores", child->key) == 0) {
318 if (rdt_config_cgroups(child) != 0) {
319 g_state = CONFIGURATION_ERROR;
320 /* if we return -1 at this point collectd
321 reports a failure in configuration and
329 #endif /* COLLECT_DEBUG */
331 ERROR(RDT_PLUGIN ": Unknown configuration parameter \"%s\".", child->key);
338 static void rdt_submit_derive(const char *cgroup, const char *type,
339 const char *type_instance, derive_t value) {
340 value_list_t vl = VALUE_LIST_INIT;
342 vl.values = &(value_t){.derive = value};
345 sstrncpy(vl.plugin, RDT_PLUGIN, sizeof(vl.plugin));
346 snprintf(vl.plugin_instance, sizeof(vl.plugin_instance), "%s", cgroup);
347 sstrncpy(vl.type, type, sizeof(vl.type));
349 sstrncpy(vl.type_instance, type_instance, sizeof(vl.type_instance));
351 plugin_dispatch_values(&vl);
354 static void rdt_submit_gauge(const char *cgroup, const char *type,
355 const char *type_instance, gauge_t value) {
356 value_list_t vl = VALUE_LIST_INIT;
358 vl.values = &(value_t){.gauge = value};
361 sstrncpy(vl.plugin, RDT_PLUGIN, sizeof(vl.plugin));
362 snprintf(vl.plugin_instance, sizeof(vl.plugin_instance), "%s", cgroup);
363 sstrncpy(vl.type, type, sizeof(vl.type));
365 sstrncpy(vl.type_instance, type_instance, sizeof(vl.type_instance));
367 plugin_dispatch_values(&vl);
370 static int rdt_read(__attribute__((unused)) user_data_t *ud) {
374 ERROR(RDT_PLUGIN ": rdt_read: plugin not initialized.");
378 ret = pqos_mon_poll(&g_rdt->pgroups[0], (unsigned)g_rdt->num_groups);
379 if (ret != PQOS_RETVAL_OK) {
380 ERROR(RDT_PLUGIN ": Failed to poll monitoring data.");
386 #endif /* COLLECT_DEBUG */
388 for (size_t i = 0; i < g_rdt->num_groups; i++) {
389 core_group_t *cgroup = g_rdt->cores.cgroups + i;
391 enum pqos_mon_event mbm_events =
392 (PQOS_MON_EVENT_LMEM_BW | PQOS_MON_EVENT_TMEM_BW |
393 PQOS_MON_EVENT_RMEM_BW);
395 const struct pqos_event_values *pv = &g_rdt->pgroups[i]->values;
397 /* Submit only monitored events data */
399 if (g_rdt->events[i] & PQOS_MON_EVENT_L3_OCCUP)
400 rdt_submit_gauge(cgroup->desc, "bytes", "llc", pv->llc);
402 if (g_rdt->events[i] & PQOS_PERF_EVENT_IPC)
403 rdt_submit_gauge(cgroup->desc, "ipc", NULL, pv->ipc);
405 if (g_rdt->events[i] & mbm_events) {
406 rdt_submit_derive(cgroup->desc, "memory_bandwidth", "local",
407 pv->mbm_local_delta);
408 rdt_submit_derive(cgroup->desc, "memory_bandwidth", "remote",
409 pv->mbm_remote_delta);
416 static int rdt_init(void) {
419 if (g_state == CONFIGURATION_ERROR)
426 /* Start monitoring */
427 for (size_t i = 0; i < g_rdt->num_groups; i++) {
428 core_group_t *cg = g_rdt->cores.cgroups + i;
430 ret = pqos_mon_start(cg->num_cores, cg->cores, g_rdt->events[i],
431 (void *)cg->desc, g_rdt->pgroups[i]);
433 if (ret != PQOS_RETVAL_OK)
434 ERROR(RDT_PLUGIN ": Error starting monitoring group %s (pqos status=%d)",
441 static int rdt_shutdown(void) {
444 DEBUG(RDT_PLUGIN ": rdt_shutdown.");
449 /* Stop monitoring */
450 for (size_t i = 0; i < g_rdt->num_groups; i++) {
451 pqos_mon_stop(g_rdt->pgroups[i]);
455 if (ret != PQOS_RETVAL_OK)
456 ERROR(RDT_PLUGIN ": Error shutting down PQoS library.");
464 void module_register(void) {
465 plugin_register_init(RDT_PLUGIN, rdt_init);
466 plugin_register_complex_config(RDT_PLUGIN, rdt_config);
467 plugin_register_complex_read(NULL, RDT_PLUGIN, rdt_read, 0, NULL);
468 plugin_register_shutdown(RDT_PLUGIN, rdt_shutdown);