2 * collectd - src/dpdkstat.c
5 * Copyright(c) 2016 Intel Corporation. All rights reserved.
7 * Permission is hereby granted, free of charge, to any person obtaining a copy of
8 * this software and associated documentation files (the "Software"), to deal in
9 * the Software without restriction, including without limitation the rights to
10 * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
11 * of the Software, and to permit persons to whom the Software is furnished to do
12 * so, subject to the following conditions:
14 * The above copyright notice and this permission notice shall be included in all
15 * copies or substantial portions of the Software.
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
26 * Maryam Tahhan <maryam.tahhan@intel.com>
27 * Harry van Haaren <harry.van.haaren@intel.com>
31 #include "common.h" /* auxiliary functions */
32 #include "plugin.h" /* plugin_register_*, plugin_dispatch_values */
33 #include "utils_time.h"
45 #include <semaphore.h>
47 #include <sys/queue.h>
49 #include <sys/types.h>
56 #include <rte_config.h>
58 #include <rte_ethdev.h>
59 #include <rte_common.h>
60 #include <rte_debug.h>
61 #include <rte_malloc.h>
62 #include <rte_memory.h>
63 #include <rte_memzone.h>
64 #include <rte_launch.h>
65 #include <rte_tailq.h>
66 #include <rte_lcore.h>
67 #include <rte_per_lcore.h>
68 #include <rte_debug.h>
70 #include <rte_atomic.h>
71 #include <rte_branch_prediction.h>
72 #include <rte_string_fns.h>
75 #define DATA_MAX_NAME_LEN 64
76 #define DPDKSTAT_MAX_BUFFER_SIZE (4096*4)
77 #define DPDK_SHM_NAME "dpdk_collectd_stats_shm"
82 enum DPDK_HELPER_ACTION {
83 DPDK_HELPER_ACTION_COUNT_STATS,
84 DPDK_HELPER_ACTION_SEND_STATS,
87 enum DPDK_HELPER_STATUS {
88 DPDK_HELPER_NOT_INITIALIZED = 0,
89 DPDK_HELPER_WAITING_ON_PRIMARY,
90 DPDK_HELPER_INITIALIZING_EAL,
91 DPDK_HELPER_ALIVE_SENDING_STATS,
92 DPDK_HELPER_GRACEFUL_QUIT,
95 struct dpdk_config_s {
96 /* General DPDK params */
97 char coremask[DATA_MAX_NAME_LEN];
98 char memory_channels[DATA_MAX_NAME_LEN];
99 char socket_memory[DATA_MAX_NAME_LEN];
100 char process_type[DATA_MAX_NAME_LEN];
101 char file_prefix[DATA_MAX_NAME_LEN];
103 uint32_t eal_initialized;
104 uint32_t enabled_port_mask;
107 int collectd_reinit_shm;
109 sem_t sema_helper_get_stats;
110 sem_t sema_stats_in_shm;
112 enum DPDK_HELPER_STATUS helper_status;
113 enum DPDK_HELPER_ACTION helper_action;
117 cdtime_t port_read_time[RTE_MAX_ETHPORTS];
118 uint32_t num_stats_in_port[RTE_MAX_ETHPORTS];
119 struct rte_eth_link link_status[RTE_MAX_ETHPORTS];
120 struct rte_eth_xstats xstats;
121 /* rte_eth_xstats from here on until the end of the SHM */
123 typedef struct dpdk_config_s dpdk_config_t;
125 static int g_configured = 0;
126 static dpdk_config_t *g_configuration = 0;
128 static int dpdk_config_init_default(void);
129 static int dpdk_config(oconfig_item_t *ci);
130 static int dpdk_helper_init_eal(void);
131 static int dpdk_helper_run(void);
132 static int dpdk_helper_spawn(enum DPDK_HELPER_ACTION action);
133 static int dpdk_init (void);
134 static int dpdk_read(user_data_t *ud);
135 static int dpdk_shm_cleanup(void);
136 static int dpdk_shm_init(size_t size);
137 void module_register(void);
139 /* Write the default configuration to the g_configuration instances */
140 static int dpdk_config_init_default(void)
142 g_configuration->interval = plugin_get_interval();
143 WARNING("dpdkstat: No time interval was configured, default value %lu ms is set\n",
144 CDTIME_T_TO_MS(g_configuration->interval));
145 g_configuration->enabled_port_mask = 0;
146 g_configuration->eal_argc = 2;
147 g_configuration->eal_initialized = 0;
148 ssnprintf(g_configuration->coremask, DATA_MAX_NAME_LEN, "%s", "0xf");
149 ssnprintf(g_configuration->memory_channels, DATA_MAX_NAME_LEN, "%s", "1");
150 ssnprintf(g_configuration->process_type, DATA_MAX_NAME_LEN, "%s", "secondary");
151 ssnprintf(g_configuration->file_prefix, DATA_MAX_NAME_LEN, "%s",
152 "/var/run/.rte_config");
156 static int dpdk_config(oconfig_item_t *ci)
160 /* Initialize a POSIX SHared Memory (SHM) object. */
161 dpdk_shm_init(sizeof(dpdk_config_t));
163 /* Set defaults for config, overwritten by loop if config item exists */
164 ret = dpdk_config_init_default();
169 for (i = 0; i < ci->children_num; i++) {
170 oconfig_item_t *child = ci->children + i;
172 if (strcasecmp("Interval", child->key) == 0) {
173 g_configuration->interval =
174 DOUBLE_TO_CDTIME_T (child->values[0].value.number);
175 DEBUG("dpdkstat: Plugin Read Interval %lu milliseconds\n",
176 CDTIME_T_TO_MS(g_configuration->interval));
177 } else if (strcasecmp("Coremask", child->key) == 0) {
178 ssnprintf(g_configuration->coremask, DATA_MAX_NAME_LEN, "%s",
179 child->values[0].value.string);
180 DEBUG("dpdkstat:COREMASK %s \n", g_configuration->coremask);
181 g_configuration->eal_argc+=1;
182 } else if (strcasecmp("MemoryChannels", child->key) == 0) {
183 ssnprintf(g_configuration->memory_channels, DATA_MAX_NAME_LEN, "%s",
184 child->values[0].value.string);
185 DEBUG("dpdkstat:Memory Channels %s \n", g_configuration->memory_channels);
186 g_configuration->eal_argc+=1;
187 } else if (strcasecmp("SocketMemory", child->key) == 0) {
188 ssnprintf(g_configuration->socket_memory, DATA_MAX_NAME_LEN, "%s",
189 child->values[0].value.string);
190 DEBUG("dpdkstat: socket mem %s \n", g_configuration->socket_memory);
191 g_configuration->eal_argc+=1;
192 } else if (strcasecmp("ProcessType", child->key) == 0) {
193 ssnprintf(g_configuration->process_type, DATA_MAX_NAME_LEN, "%s",
194 child->values[0].value.string);
195 DEBUG("dpdkstat: proc type %s \n", g_configuration->process_type);
196 g_configuration->eal_argc+=1;
197 } else if (strcasecmp("FilePrefix", child->key) == 0) {
198 ssnprintf(g_configuration->file_prefix, DATA_MAX_NAME_LEN, "/var/run/.%s_config",
199 child->values[0].value.string);
200 DEBUG("dpdkstat: file prefix %s \n", g_configuration->file_prefix);
201 if (strcasecmp(g_configuration->file_prefix, "/var/run/.rte_config") != 0) {
202 g_configuration->eal_argc+=1;
205 WARNING ("dpdkstat: The config option \"%s\" is unknown.",
208 } /* End for (i = 0; i < ci->children_num; i++)*/
209 g_configured = 1; /* Bypass configuration in dpdk_shm_init(). */
214 /* Initialize SHared Memory (SHM) for config and helper process */
215 static int dpdk_shm_init(size_t size)
218 * Check if SHM is already configured: when config items are provided, the
219 * config function initializes SHM. If there is no config, then init() will
225 /* Create and open a new object, or open an existing object. */
226 int fd = shm_open(DPDK_SHM_NAME, O_CREAT | O_TRUNC | O_RDWR, 0666);
228 WARNING("dpdkstat:Failed to open %s as SHM:%s\n", DPDK_SHM_NAME,
232 /* Set the size of the shared memory object. */
233 int ret = ftruncate(fd, size);
235 WARNING("dpdkstat:Failed to resize SHM:%s\n", strerror(errno));
238 /* Map the shared memory object into this process' virtual address space. */
239 g_configuration = (dpdk_config_t *)
240 mmap(0, size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
241 if (g_configuration == MAP_FAILED) {
242 WARNING("dpdkstat:Failed to mmap SHM:%s\n", strerror(errno));
246 * Close the file descriptor, the shared memory object still exists
247 * and can only be removed by calling shm_unlink().
251 /* Initialize g_configuration. */
252 memset(g_configuration, 0, size);
254 /* Initialize the semaphores for SHM use */
255 sem_init(&g_configuration->sema_helper_get_stats, 1, 0);
256 sem_init(&g_configuration->sema_stats_in_shm, 1, 0);
262 /* Reset to zero, as it was set to MAP_FAILED aka: (void *)-1. Avoid
263 * an issue if collectd attempts to run this plugin failure.
269 static int dpdk_re_init_shm()
271 dpdk_config_t temp_config;
272 memcpy(&temp_config,g_configuration, sizeof(dpdk_config_t));
273 DEBUG("dpdkstat: %s: ports %d, xstats %d\n", __func__, temp_config.num_ports,
274 temp_config.num_xstats);
276 int shm_xstats_size = sizeof(dpdk_config_t) + (sizeof(struct rte_eth_xstats) *
277 g_configuration->num_xstats);
278 DEBUG("=== SHM new size for %d xstats\n", g_configuration->num_xstats);
280 int err = dpdk_shm_cleanup();
282 ERROR("dpdkstat: Error in shm_cleanup in %s\n", __func__);
284 err = dpdk_shm_init(shm_xstats_size);
286 ERROR("dpdkstat: Error in shm_init in %s\n", __func__);
288 /* If the XML config() function has been run, dont re-initialize defaults */
290 dpdk_config_init_default();
292 memcpy(g_configuration,&temp_config, sizeof(dpdk_config_t));
293 g_configuration->collectd_reinit_shm = 0;
298 static int dpdk_init (void)
301 int err = dpdk_shm_init(sizeof(dpdk_config_t));
303 ERROR("dpdkstat: %s : error %d in shm_init()", __func__, err);
305 /* If the XML config() function has been run, dont re-initialize defaults */
307 ret = dpdk_config_init_default();
313 plugin_register_complex_read (NULL, "dpdkstat", dpdk_read,
314 g_configuration->interval, NULL);
318 static int dpdk_helper_exit(int reset)
320 g_configuration->helper_status = DPDK_HELPER_GRACEFUL_QUIT;
322 g_configuration->eal_initialized = 0;
323 g_configuration->num_ports = 0;
324 memset(&g_configuration->xstats, 0, g_configuration->num_xstats* sizeof(struct rte_eth_xstats));
325 g_configuration->num_xstats = 0;
327 for(;i < RTE_MAX_ETHPORTS; i++)
328 g_configuration->num_stats_in_port[i] = 0;
330 close(g_configuration->helper_pipes[1]);
331 kill(g_configuration->helper_pid, SIGKILL);
335 static int dpdk_helper_spawn(enum DPDK_HELPER_ACTION action)
337 g_configuration->eal_initialized = 0;
338 g_configuration->helper_action = action;
340 * Create a pipe for helper stdout back to collectd. This is necessary for
341 * logging EAL failures, as rte_eal_init() calls rte_panic().
343 if(g_configuration->helper_pipes[1]) {
344 DEBUG("dpdkstat: collectd closing helper pipe %d\n",
345 g_configuration->helper_pipes[1]);
347 DEBUG("dpdkstat: collectd helper pipe %d, not closing\n",
348 g_configuration->helper_pipes[1]);
350 if(pipe(g_configuration->helper_pipes) != 0) {
351 DEBUG("dpdkstat: Could not create helper pipe: %s\n", strerror(errno));
355 int pipe0_flags = fcntl(g_configuration->helper_pipes[1], F_GETFL, 0);
356 int pipe1_flags = fcntl(g_configuration->helper_pipes[0], F_GETFL, 0);
357 fcntl(g_configuration->helper_pipes[1], F_SETFL, pipe1_flags | O_NONBLOCK);
358 fcntl(g_configuration->helper_pipes[0], F_SETFL, pipe0_flags | O_NONBLOCK);
362 close(g_configuration->helper_pipes[1]);
363 g_configuration->helper_pid = pid;
364 DEBUG("dpdkstat: helper pid %u\n", g_configuration->helper_pid);
365 /* Kick helper once its alive to have it start processing */
366 sem_post(&g_configuration->sema_helper_get_stats);
367 } else if (pid == 0) {
368 /* Replace stdout with a pipe to collectd. */
369 close(g_configuration->helper_pipes[0]);
370 close(STDOUT_FILENO);
371 dup2(g_configuration->helper_pipes[1], STDOUT_FILENO);
374 ERROR("dpdkstat: Failed to fork helper process: %s\n", strerror(errno));
381 * Initialize the DPDK EAL, if this returns, EAL is successfully initialized.
382 * On failure, the EAL prints an error message, and the helper process exits.
384 static int dpdk_helper_init_eal(void)
386 g_configuration->helper_status = DPDK_HELPER_INITIALIZING_EAL;
387 char *argp[(g_configuration->eal_argc) + 1];
390 argp[i++] = "collectd-dpdk";
391 if(strcasecmp(g_configuration->coremask, "") != 0) {
393 argp[i++] = g_configuration->coremask;
395 if(strcasecmp(g_configuration->memory_channels, "") != 0) {
397 argp[i++] = g_configuration->memory_channels;
399 if(strcasecmp(g_configuration->socket_memory, "") != 0) {
400 argp[i++] = "--socket-mem";
401 argp[i++] = g_configuration->socket_memory;
403 if(strcasecmp(g_configuration->file_prefix, "") != 0 &&
404 strcasecmp(g_configuration->file_prefix, "/var/run/.rte_config") != 0) {
405 argp[i++] = "--file-prefix";
406 argp[i++] = g_configuration->file_prefix;
408 if(strcasecmp(g_configuration->process_type, "") != 0) {
409 argp[i++] = "--proc-type";
410 argp[i++] = g_configuration->process_type;
412 g_configuration->eal_argc = i;
414 g_configuration->eal_initialized = 1;
415 int ret = rte_eal_init(g_configuration->eal_argc, argp);
417 g_configuration->eal_initialized = 0;
418 printf("dpdkstat: ERROR initializing EAL ret = %d\n", ret);
419 printf("dpdkstat: EAL arguments: ");
420 for (i=0; i< g_configuration->eal_argc; i++) {
421 printf("%s ", argp[i]);
429 static int dpdk_helper_run (void)
431 pid_t ppid = getppid();
432 g_configuration->helper_status = DPDK_HELPER_WAITING_ON_PRIMARY;
435 /* sem_timedwait() to avoid blocking forever */
437 cdtime_t now = cdtime();
438 cdtime_t half_sec = MS_TO_CDTIME_T(1500);
439 CDTIME_T_TO_TIMESPEC(now + half_sec + g_configuration->interval *2, &ts);
440 int ret = sem_timedwait(&g_configuration->sema_helper_get_stats, &ts);
442 if(ret == -1 && errno == ETIMEDOUT) {
443 ERROR("dpdkstat-helper: sem timedwait()"
444 " timeout, did collectd terminate?\n");
445 dpdk_helper_exit(RESET);
448 /* Parent PID change means collectd died so quit the helper process. */
449 if (ppid != getppid()) {
450 WARNING("dpdkstat-helper: parent PID changed, quitting.\n");
451 dpdk_helper_exit(RESET);
454 /* Checking for DPDK primary process. */
455 if (!rte_eal_primary_proc_alive(g_configuration->file_prefix)) {
456 if(g_configuration->eal_initialized) {
457 WARNING("dpdkstat-helper: no primary alive but EAL initialized:"
459 dpdk_helper_exit(RESET);
461 g_configuration->helper_status = DPDK_HELPER_WAITING_ON_PRIMARY;
462 /* Back to start of while() - waiting for primary process */
466 if(!g_configuration->eal_initialized) {
467 /* Initialize EAL. */
468 int ret = dpdk_helper_init_eal();
470 dpdk_helper_exit(RESET);
473 g_configuration->helper_status = DPDK_HELPER_ALIVE_SENDING_STATS;
476 nb_ports = rte_eth_dev_count();
478 DEBUG("dpdkstat-helper: No DPDK ports available. "
479 "Check bound devices to DPDK driver.\n");
483 if (nb_ports > RTE_MAX_ETHPORTS)
484 nb_ports = RTE_MAX_ETHPORTS;
485 /* If no port mask was specified enable all ports*/
486 if (g_configuration->enabled_port_mask == 0)
487 g_configuration->enabled_port_mask = 0xffff;
489 int i, len = 0, enabled_port_count = 0, num_xstats = 0;
490 for (i = 0; i < nb_ports; i++) {
491 if (g_configuration->enabled_port_mask & (1 << i)) {
492 if(g_configuration->helper_action == DPDK_HELPER_ACTION_COUNT_STATS) {
493 len = rte_eth_xstats_get(i, NULL, 0);
495 ERROR("dpdkstat-helper: Cannot get xstats count\n");
499 g_configuration->num_stats_in_port[enabled_port_count] = len;
500 enabled_port_count++;
503 len = g_configuration->num_stats_in_port[enabled_port_count];
504 g_configuration->port_read_time[enabled_port_count] = cdtime();
505 ret = rte_eth_xstats_get(i, &g_configuration->xstats + num_xstats,
506 g_configuration->num_stats_in_port[i]);
507 if (ret < 0 || ret != len) {
508 DEBUG("dpdkstat-helper: Error reading xstats on port %d len = %d\n",
512 num_xstats += g_configuration->num_stats_in_port[enabled_port_count];
513 enabled_port_count++;
515 } /* if (enabled_port_mask) */
516 } /* for (nb_ports) */
518 if(g_configuration->helper_action == DPDK_HELPER_ACTION_COUNT_STATS) {
519 g_configuration->num_ports = enabled_port_count;
520 g_configuration->num_xstats = num_xstats;
521 DEBUG("dpdkstat-helper ports: %d, num stats: %d\n",
522 g_configuration->num_ports, g_configuration->num_xstats);
523 /* Exit, allowing collectd to re-init SHM to the right size */
524 g_configuration->collectd_reinit_shm = REINIT_SHM;
525 dpdk_helper_exit(NO_RESET);
527 /* Now kick collectd send thread to send the stats */
528 sem_post(&g_configuration->sema_stats_in_shm);
534 static int dpdk_read (user_data_t *ud)
539 * Check if SHM flag is set to be re-initialized. AKA DPDK ports have been
540 * counted, so re-init SHM to be large enough to fit all the statistics.
542 if(g_configuration->collectd_reinit_shm) {
543 DEBUG("dpdkstat: read() now reinit SHM then launching send-thread\n");
548 * Check if DPDK proc is alive, and has already counted port / stats. This
549 * must be done in dpdk_read(), because the DPDK primary process may not be
550 * alive at dpdk_init() time.
552 if(g_configuration->helper_status == DPDK_HELPER_NOT_INITIALIZED ||
553 g_configuration->helper_status == DPDK_HELPER_GRACEFUL_QUIT) {
554 int action = DPDK_HELPER_ACTION_SEND_STATS;
555 if(g_configuration->num_xstats == 0)
556 action = DPDK_HELPER_ACTION_COUNT_STATS;
557 /* Spawn the helper thread to count stats or to read stats. */
558 dpdk_helper_spawn(action);
562 pid_t ws = waitpid(g_configuration->helper_pid, &exit_status, WNOHANG);
564 * Conditions under which to respawn helper:
565 * waitpid() fails, helper process died (or quit), so respawn
567 int respawn_helper = 0;
572 char buf[DPDKSTAT_MAX_BUFFER_SIZE];
573 char out[DPDKSTAT_MAX_BUFFER_SIZE];
575 /* non blocking check on helper logging pipe */
577 fds.fd = g_configuration->helper_pipes[0];
579 int data_avail = poll(&fds, 1, 0);
581 int nbytes = read(g_configuration->helper_pipes[0], buf, sizeof(buf));
584 ssnprintf( out, nbytes, "%s", buf);
585 DEBUG("dpdkstat: helper-proc: %s\n", out);
589 if (g_configuration->helper_pid)
590 dpdk_helper_exit(RESET);
591 dpdk_helper_spawn(DPDK_HELPER_ACTION_COUNT_STATS);
594 struct timespec helper_kick_time;
595 clock_gettime(CLOCK_REALTIME, &helper_kick_time);
596 /* Kick helper process through SHM */
597 sem_post(&g_configuration->sema_helper_get_stats);
600 cdtime_t now = cdtime();
601 CDTIME_T_TO_TIMESPEC(now + g_configuration->interval, &ts);
602 ret = sem_timedwait(&g_configuration->sema_stats_in_shm, &ts);
603 if(ret == -1 && errno == ETIMEDOUT) {
604 DEBUG("dpdkstat: timeout in collectd thread: is a DPDK Primary running? \n");
608 /* Dispatch the stats.*/
611 for (i = 0; i < g_configuration->num_ports; i++) {
612 cdtime_t time = g_configuration->port_read_time[i];
614 int len = g_configuration->num_stats_in_port[i];
615 ssnprintf(dev_name, sizeof(dev_name), "port.%d", i);
616 struct rte_eth_xstats *xstats = (&g_configuration->xstats);
617 xstats += count; /* pointer arithmetic to jump to each stats struct */
618 for (j = 0; j < len; j++) {
619 value_t dpdkstat_values[1];
620 value_list_t dpdkstat_vl = VALUE_LIST_INIT;
622 dpdkstat_values[0].counter = xstats[j].value;
623 dpdkstat_vl.values = dpdkstat_values;
624 dpdkstat_vl.values_len = 1; /* Submit stats one at a time */
625 dpdkstat_vl.time = time;
626 sstrncpy (dpdkstat_vl.host, hostname_g, sizeof (dpdkstat_vl.host));
627 sstrncpy (dpdkstat_vl.plugin, "dpdkstat", sizeof (dpdkstat_vl.plugin));
628 sstrncpy (dpdkstat_vl.plugin_instance, dev_name,
629 sizeof (dpdkstat_vl.plugin_instance));
630 sstrncpy (dpdkstat_vl.type, "counter",
631 sizeof (dpdkstat_vl.type));
632 sstrncpy (dpdkstat_vl.type_instance, xstats[j].name,
633 sizeof (dpdkstat_vl.type_instance));
634 plugin_dispatch_values (&dpdkstat_vl);
637 } /* for each port */
641 static int dpdk_shm_cleanup(void)
643 int ret = munmap(g_configuration, sizeof(dpdk_config_t));
646 WARNING("dpdkstat: munmap returned %d\n", ret);
649 ret = shm_unlink(DPDK_SHM_NAME);
651 WARNING("dpdkstat: shm_unlink returned %d\n", ret);
657 static int dpdk_shutdown (void)
659 close(g_configuration->helper_pipes[1]);
660 kill(g_configuration->helper_pid, SIGKILL);
661 int ret = dpdk_shm_cleanup();
666 void module_register (void)
668 plugin_register_complex_config ("dpdkstat", dpdk_config);
669 plugin_register_init ("dpdkstat", dpdk_init);
670 plugin_register_shutdown ("dpdkstat", dpdk_shutdown);