1 /*****************************************************************************
2 * RRDtool 1.3.2 Copyright by Tobi Oetiker, 1997-2008
3 *****************************************************************************
4 * rrd_open.c Open an RRD File
5 *****************************************************************************
7 *****************************************************************************/
18 # define random() rand()
19 # define srandom(x) srand(x)
22 #define _LK_UNLCK 0 /* Unlock */
23 #define _LK_LOCK 1 /* Lock */
24 #define _LK_NBLCK 2 /* Non-blocking lock */
25 #define _LK_RLCK 3 /* Lock for read only */
26 #define _LK_NBRLCK 4 /* Non-blocking lock for read only */
29 #define LK_UNLCK _LK_UNLCK
30 #define LK_LOCK _LK_LOCK
31 #define LK_NBLCK _LK_NBLCK
32 #define LK_RLCK _LK_RLCK
33 #define LK_NBRLCK _LK_NBRLCK
36 /* DEBUG 2 prints information obtained via mincore(2) */
38 /* do not calculate exact madvise hints but assume 1 page for headers and
39 * set DONTNEED for the rest, which is assumed to be data */
40 /* Avoid calling madvise on areas that were already hinted. May be benefical if
41 * your syscalls are very slow */
44 /* the cast to void* is there to avoid this warning seen on ia64 with certain
45 versions of gcc: 'cast increases required alignment of target type'
47 #define __rrd_read(dst, dst_t, cnt) { \
48 size_t wanted = sizeof(dst_t)*(cnt); \
49 if (offset + wanted > rrd_file->file_len) { \
50 rrd_set_error("reached EOF while loading header " #dst); \
51 goto out_nullify_head; \
53 (dst) = (dst_t*)(void*) (data + offset); \
57 #define __rrd_read(dst, dst_t, cnt) { \
58 size_t wanted = sizeof(dst_t)*(cnt); \
60 if ((dst = (dst_t*)malloc(wanted)) == NULL) { \
61 rrd_set_error(#dst " malloc"); \
62 goto out_nullify_head; \
64 got = read (rrd_simple_file->fd, dst, wanted); \
65 if (got != wanted) { \
66 rrd_set_error("short read while reading header " #dst); \
67 goto out_nullify_head; \
73 /* get the address of the start of this page */
74 #if defined USE_MADVISE || defined HAVE_POSIX_FADVISE
76 #define PAGE_START(addr) ((addr)&(~(_page_size-1)))
80 long int rra_random_row(
84 /* Open a database file, return its header and an open filehandle,
85 * positioned to the first cdp in the first rra.
86 * In the error path of rrd_open, only rrd_free(&rrd) has to be called
87 * before returning an error. Do not call rrd_close upon failure of rrd_open.
88 * If creating a new file, the parameter rrd must be initialised with
89 * details of the file content.
90 * If opening an existing file, then use rrd must be initialised by
91 * rrd_init(rrd) prior to invoking rrd_open
95 const char *const file_name,
104 ssize_t _page_size = sysconf(_SC_PAGESIZE);
105 char *data = MAP_FAILED;
109 rrd_file_t *rrd_file = NULL;
110 rrd_simple_file_t *rrd_simple_file = NULL;
111 size_t newfile_size = 0;
112 size_t header_len, value_cnt, data_len;
114 /* Are we creating a new file? */
115 if((rdwr & RRD_CREAT) && (rrd->stat_head != NULL))
118 sizeof(stat_head_t) + \
119 sizeof(ds_def_t) * rrd->stat_head->ds_cnt + \
120 sizeof(rra_def_t) * rrd->stat_head->rra_cnt + \
122 sizeof(live_head_t) + \
123 sizeof(pdp_prep_t) * rrd->stat_head->ds_cnt + \
124 sizeof(cdp_prep_t) * rrd->stat_head->ds_cnt * rrd->stat_head->rra_cnt + \
125 sizeof(rra_ptr_t) * rrd->stat_head->rra_cnt;
128 for (ui = 0; ui < rrd->stat_head->rra_cnt; ui++)
129 value_cnt += rrd->stat_head->ds_cnt * rrd->rra_def[ui].row_cnt;
131 data_len = sizeof(rrd_value_t) * value_cnt;
133 newfile_size = header_len + data_len;
136 rrd_file = (rrd_file_t*)malloc(sizeof(rrd_file_t));
137 if (rrd_file == NULL) {
138 rrd_set_error("allocating rrd_file descriptor for '%s'", file_name);
141 memset(rrd_file, 0, sizeof(rrd_file_t));
143 rrd_file->pvt = malloc(sizeof(rrd_simple_file_t));
144 if(rrd_file->pvt == NULL) {
145 rrd_set_error("allocating rrd_simple_file for '%s'", file_name);
148 memset(rrd_file->pvt, 0, sizeof(rrd_simple_file_t));
149 rrd_simple_file = (rrd_simple_file_t *)rrd_file->pvt;
152 if ((rdwr & (RRD_READONLY | RRD_READWRITE)) ==
153 (RRD_READONLY | RRD_READWRITE)) {
154 /* Both READONLY and READWRITE were given, which is invalid. */
155 rrd_set_error("in read/write request mask");
161 rrd_simple_file->mm_prot = PROT_READ;
162 rrd_simple_file->mm_flags = 0;
165 if (rdwr & RRD_READONLY) {
168 rrd_simple_file->mm_flags = MAP_PRIVATE;
169 # ifdef MAP_NORESERVE
170 rrd_simple_file->mm_flags |= MAP_NORESERVE; /* readonly, so no swap backing needed */
174 if (rdwr & RRD_READWRITE) {
177 rrd_simple_file->mm_flags = MAP_SHARED;
178 rrd_simple_file->mm_prot |= PROT_WRITE;
181 if (rdwr & RRD_CREAT) {
182 flags |= (O_CREAT | O_TRUNC);
185 if (rdwr & RRD_READAHEAD) {
187 rrd_simple_file->mm_flags |= MAP_POPULATE; /* populate ptes and data */
189 #if defined MAP_NONBLOCK
190 rrd_simple_file->mm_flags |= MAP_NONBLOCK; /* just populate ptes */
193 #if defined(_WIN32) && !defined(__CYGWIN__) && !defined(__CYGWIN32__)
197 if ((rrd_simple_file->fd = open(file_name, flags, 0666)) < 0) {
198 rrd_set_error("opening '%s': %s", file_name, rrd_strerror(errno));
202 /* Better try to avoid seeks as much as possible. stat may be heavy but
203 * many concurrent seeks are even worse. */
204 if (newfile_size == 0 && ((fstat(rrd_simple_file->fd, &statb)) < 0)) {
205 rrd_set_error("fstat '%s': %s", file_name, rrd_strerror(errno));
208 if (newfile_size == 0) {
209 rrd_file->file_len = statb.st_size;
211 rrd_file->file_len = newfile_size;
212 lseek(rrd_simple_file->fd, newfile_size - 1, SEEK_SET);
213 write(rrd_simple_file->fd, "\0", 1); /* poke */
214 lseek(rrd_simple_file->fd, 0, SEEK_SET);
216 #ifdef HAVE_POSIX_FADVISE
217 /* In general we need no read-ahead when dealing with rrd_files.
218 When we stop reading, it is highly unlikely that we start up again.
219 In this manner we actually save time and diskaccess (and buffer cache).
220 Thanks to Dave Plonka for the Idea of using POSIX_FADV_RANDOM here. */
221 posix_fadvise(rrd_simple_file->fd, 0, 0, POSIX_FADV_RANDOM);
225 if (rdwr & RRD_READWRITE)
227 if (setvbuf((rrd_simple_file->fd),NULL,_IONBF,2)) {
228 rrd_set_error("failed to disable the stream buffer\n");
235 data = mmap(0, rrd_file->file_len,
236 rrd_simple_file->mm_prot, rrd_simple_file->mm_flags,
237 rrd_simple_file->fd, offset);
239 /* lets see if the first read worked */
240 if (data == MAP_FAILED) {
241 rrd_set_error("mmaping file '%s': %s", file_name,
242 rrd_strerror(errno));
245 rrd_simple_file->file_start = data;
246 if (rdwr & RRD_CREAT) {
247 memset(data, DNAN, newfile_size - 1);
251 if (rdwr & RRD_CREAT)
254 if (rdwr & RRD_COPY) {
255 /* We will read everything in a moment (copying) */
256 madvise(data, rrd_file->file_len, MADV_WILLNEED | MADV_SEQUENTIAL);
258 /* We do not need to read anything in for the moment */
259 madvise(data, rrd_file->file_len, MADV_RANDOM);
260 /* the stat_head will be needed soonish, so hint accordingly */
261 madvise(data, sizeof(stat_head_t), MADV_WILLNEED | MADV_RANDOM);
265 __rrd_read(rrd->stat_head, stat_head_t,
268 /* lets do some test if we are on track ... */
269 if (memcmp(rrd->stat_head->cookie, RRD_COOKIE, sizeof(RRD_COOKIE)) != 0) {
270 rrd_set_error("'%s' is not an RRD file", file_name);
271 goto out_nullify_head;
274 if (rrd->stat_head->float_cookie != FLOAT_COOKIE) {
275 rrd_set_error("This RRD was created on another architecture");
276 goto out_nullify_head;
279 version = atoi(rrd->stat_head->version);
281 if (version > atoi(RRD_VERSION)) {
282 rrd_set_error("can't handle RRD file version %s",
283 rrd->stat_head->version);
284 goto out_nullify_head;
286 #if defined USE_MADVISE
287 /* the ds_def will be needed soonish, so hint accordingly */
288 madvise(data + PAGE_START(offset),
289 sizeof(ds_def_t) * rrd->stat_head->ds_cnt, MADV_WILLNEED);
291 __rrd_read(rrd->ds_def, ds_def_t,
292 rrd->stat_head->ds_cnt);
294 #if defined USE_MADVISE
295 /* the rra_def will be needed soonish, so hint accordingly */
296 madvise(data + PAGE_START(offset),
297 sizeof(rra_def_t) * rrd->stat_head->rra_cnt, MADV_WILLNEED);
299 __rrd_read(rrd->rra_def, rra_def_t,
300 rrd->stat_head->rra_cnt);
302 /* handle different format for the live_head */
304 rrd->live_head = (live_head_t *) malloc(sizeof(live_head_t));
305 if (rrd->live_head == NULL) {
306 rrd_set_error("live_head_t malloc");
309 #if defined USE_MADVISE
310 /* the live_head will be needed soonish, so hint accordingly */
311 madvise(data + PAGE_START(offset), sizeof(time_t), MADV_WILLNEED);
313 __rrd_read(rrd->legacy_last_up, time_t,
316 rrd->live_head->last_up = *rrd->legacy_last_up;
317 rrd->live_head->last_up_usec = 0;
319 #if defined USE_MADVISE
320 /* the live_head will be needed soonish, so hint accordingly */
321 madvise(data + PAGE_START(offset),
322 sizeof(live_head_t), MADV_WILLNEED);
324 __rrd_read(rrd->live_head, live_head_t,
327 __rrd_read(rrd->pdp_prep, pdp_prep_t,
328 rrd->stat_head->ds_cnt);
329 __rrd_read(rrd->cdp_prep, cdp_prep_t,
330 rrd->stat_head->rra_cnt * rrd->stat_head->ds_cnt);
331 __rrd_read(rrd->rra_ptr, rra_ptr_t,
332 rrd->stat_head->rra_cnt);
334 rrd_file->header_len = offset;
335 rrd_file->pos = offset;
338 unsigned long row_cnt = 0;
340 for (ui=0; ui<rrd->stat_head->rra_cnt; ui++)
341 row_cnt += rrd->rra_def[ui].row_cnt;
343 size_t correct_len = rrd_file->header_len +
344 sizeof(rrd_value_t) * row_cnt * rrd->stat_head->ds_cnt;
346 if (correct_len > rrd_file->file_len)
348 rrd_set_error("'%s' is too small (should be %ld bytes)",
349 file_name, (long long) correct_len);
350 goto out_nullify_head;
357 rrd->stat_head = NULL;
360 if (data != MAP_FAILED)
361 munmap(data, rrd_file->file_len);
363 close(rrd_simple_file->fd);
371 #if defined DEBUG && DEBUG > 1
372 /* Print list of in-core pages of a the current rrd_file. */
375 rrd_file_t *rrd_file,
378 rrd_simple_file_t *rrd_simple_file;
379 rrd_simple_file = (rrd_simple_file_t *)rrd_file->pvt;
381 /* pretty print blocks in core */
384 ssize_t _page_size = sysconf(_SC_PAGESIZE);
386 off = rrd_file->file_len +
387 ((rrd_file->file_len + _page_size - 1) / _page_size);
391 if (mincore(rrd_simple_file->file_start, rrd_file->file_len, vec) == 0) {
393 unsigned is_in = 0, was_in = 0;
395 for (off = 0, prev = 0; off < rrd_file->file_len; ++off) {
396 is_in = vec[off] & 1; /* if lsb set then is core resident */
399 if (was_in != is_in) {
400 fprintf(stderr, "%s: %sin core: %p len %ld\n", mark,
401 was_in ? "" : "not ", vec + prev, off - prev);
407 "%s: %sin core: %p len %ld\n", mark,
408 was_in ? "" : "not ", vec + prev, off - prev);
410 fprintf(stderr, "mincore: %s", rrd_strerror(errno));
413 fprintf(stderr, "sorry mincore only works with mmap");
416 #endif /* defined DEBUG && DEBUG > 1 */
419 * get exclusive lock to whole file.
420 * lock gets removed when we close the file
422 * returns 0 on success
425 rrd_file_t *rrd_file)
428 rrd_simple_file_t *rrd_simple_file;
429 rrd_simple_file = (rrd_simple_file_t *)rrd_file->pvt;
432 #if defined(_WIN32) && !defined(__CYGWIN__) && !defined(__CYGWIN32__)
435 if (_fstat(rrd_simple_file->fd, &st) == 0) {
436 rcstat = _locking(rrd_simple_file->fd, _LK_NBLCK, st.st_size);
443 lock.l_type = F_WRLCK; /* exclusive write lock */
444 lock.l_len = 0; /* whole file */
445 lock.l_start = 0; /* start of file */
446 lock.l_whence = SEEK_SET; /* end of file */
448 rcstat = fcntl(rrd_simple_file->fd, F_SETLK, &lock);
456 /* drop cache except for the header and the active pages */
458 rrd_file_t *rrd_file,
461 rrd_simple_file_t *rrd_simple_file = (rrd_simple_file_t *)rrd_file->pvt;
462 #if defined USE_MADVISE || defined HAVE_POSIX_FADVISE
463 size_t dontneed_start;
467 ssize_t _page_size = sysconf(_SC_PAGESIZE);
469 if (rrd_file == NULL) {
470 #if defined DEBUG && DEBUG
471 fprintf (stderr, "rrd_dontneed: Argument 'rrd_file' is NULL.\n");
476 #if defined DEBUG && DEBUG > 1
477 mincore_print(rrd_file, "before");
480 /* ignoring errors from RRDs that are smaller then the file_len+rounding */
481 rra_start = rrd_file->header_len;
482 dontneed_start = PAGE_START(rra_start) + _page_size;
483 for (i = 0; i < rrd->stat_head->rra_cnt; ++i) {
486 + rrd->rra_ptr[i].cur_row
487 * rrd->stat_head->ds_cnt * sizeof(rrd_value_t));
488 if (active_block > dontneed_start) {
490 madvise(rrd_simple_file->file_start + dontneed_start,
491 active_block - dontneed_start - 1, MADV_DONTNEED);
493 /* in linux at least only fadvise DONTNEED seems to purge pages from cache */
494 #ifdef HAVE_POSIX_FADVISE
495 posix_fadvise(rrd_simple_file->fd, dontneed_start,
496 active_block - dontneed_start - 1,
497 POSIX_FADV_DONTNEED);
500 dontneed_start = active_block;
501 /* do not release 'hot' block if update for this RAA will occur
502 * within 10 minutes */
503 if (rrd->stat_head->pdp_step * rrd->rra_def[i].pdp_cnt -
504 rrd->live_head->last_up % (rrd->stat_head->pdp_step *
505 rrd->rra_def[i].pdp_cnt) < 10 * 60) {
506 dontneed_start += _page_size;
509 rrd->rra_def[i].row_cnt * rrd->stat_head->ds_cnt *
513 if (dontneed_start < rrd_file->file_len) {
515 madvise(rrd_simple_file->file_start + dontneed_start,
516 rrd_file->file_len - dontneed_start, MADV_DONTNEED);
518 #ifdef HAVE_POSIX_FADVISE
519 posix_fadvise(rrd_simple_file->fd, dontneed_start,
520 rrd_file->file_len - dontneed_start,
521 POSIX_FADV_DONTNEED);
525 #if defined DEBUG && DEBUG > 1
526 mincore_print(rrd_file, "after");
528 #endif /* without madvise and posix_fadvise ist does not make much sense todo anything */
536 rrd_file_t *rrd_file)
538 rrd_simple_file_t *rrd_simple_file;
539 rrd_simple_file = (rrd_simple_file_t *)rrd_file->pvt;
543 ret = msync(rrd_simple_file->file_start, rrd_file->file_len, MS_ASYNC);
545 rrd_set_error("msync rrd_file: %s", rrd_strerror(errno));
546 ret = munmap(rrd_simple_file->file_start, rrd_file->file_len);
548 rrd_set_error("munmap rrd_file: %s", rrd_strerror(errno));
550 ret = close(rrd_simple_file->fd);
552 rrd_set_error("closing file: %s", rrd_strerror(errno));
560 /* Set position of rrd_file. */
563 rrd_file_t *rrd_file,
568 rrd_simple_file_t *rrd_simple_file;
569 rrd_simple_file = (rrd_simple_file_t *)rrd_file->pvt;
572 if (whence == SEEK_SET)
574 else if (whence == SEEK_CUR)
575 rrd_file->pos += off;
576 else if (whence == SEEK_END)
577 rrd_file->pos = rrd_file->file_len + off;
579 ret = lseek(rrd_simple_file->fd, off, whence);
581 rrd_set_error("lseek: %s", rrd_strerror(errno));
584 /* mimic fseek, which returns 0 upon success */
585 return ret < 0; /*XXX: or just ret to mimic lseek */
589 /* Get current position in rrd_file. */
592 rrd_file_t *rrd_file)
594 return rrd_file->pos;
598 /* Read count bytes into buffer buf, starting at rrd_file->pos.
599 * Returns the number of bytes read or <0 on error. */
602 rrd_file_t *rrd_file,
606 rrd_simple_file_t *rrd_simple_file = (rrd_simple_file_t *)rrd_file->pvt;
611 if (rrd_file->pos > rrd_file->file_len || _cnt == 0) /* EOF */
614 return -1; /* EINVAL */
615 _surplus = rrd_file->pos + _cnt - rrd_file->file_len;
616 if (_surplus > 0) { /* short read */
621 buf = memcpy(buf, rrd_simple_file->file_start + rrd_file->pos, _cnt);
623 rrd_file->pos += _cnt; /* mimmic read() semantics */
628 ret = read(rrd_simple_file->fd, buf, count);
630 rrd_file->pos += ret; /* mimmic read() semantics */
636 /* Write count bytes from buffer buf to the current position
637 * rrd_file->pos of rrd_simple_file->fd.
638 * Returns the number of bytes written or <0 on error. */
641 rrd_file_t *rrd_file,
645 rrd_simple_file_t *rrd_simple_file = (rrd_simple_file_t *)rrd_file->pvt;
647 size_t old_size = rrd_file->file_len;
651 return -1; /* EINVAL */
653 if((rrd_file->pos + count) > old_size)
655 rrd_set_error("attempting to write beyond end of file");
658 memcpy(rrd_simple_file->file_start + rrd_file->pos, buf, count);
659 rrd_file->pos += count;
660 return count; /* mimmic write() semantics */
662 ssize_t _sz = write(rrd_simple_file->fd, buf, count);
665 rrd_file->pos += _sz;
671 /* flush all data pending to be written to FD. */
674 rrd_file_t *rrd_file)
677 rrd_simple_file_t *rrd_simple_file;
678 rrd_simple_file = (rrd_simple_file_t *)rrd_file->pvt;
679 if (fdatasync(rrd_simple_file->fd) != 0) {
680 rrd_set_error("flushing fd %d: %s", rrd_simple_file->fd,
681 rrd_strerror(errno));
687 /* Initialize RRD header. */
692 rrd->stat_head = NULL;
695 rrd->live_head = NULL;
696 rrd->legacy_last_up = NULL;
698 rrd->pdp_prep = NULL;
699 rrd->cdp_prep = NULL;
700 rrd->rrd_value = NULL;
704 /* free RRD header data. */
710 if (rrd->legacy_last_up) { /* this gets set for version < 3 only */
711 free(rrd->live_head);
718 free(rrd->live_head);
719 free(rrd->stat_head);
725 free(rrd->rrd_value);
730 /* routine used by external libraries to free memory allocated by
740 * rra_update informs us about the RRAs being updated
741 * The low level storage API may use this information for
742 * aligning RRAs within stripes, or other performance enhancements
745 rrd_file_t *rrd_file __attribute__((unused)),
746 int rra_idx __attribute__((unused)),
747 unsigned long rra_row __attribute__((unused)),
748 time_t rra_time __attribute__((unused)))
753 * This function is called when creating a new RRD
754 * The storage implementation can use this opportunity to select
755 * a sensible starting row within the file.
756 * The default implementation is random, to ensure that all RRAs
757 * don't change to a new disk block at the same time
759 unsigned long rrd_select_initial_row(
760 rrd_file_t *rrd_file __attribute__((unused)),
761 int rra_idx __attribute__((unused)),
765 return rra_random_row(rra);
768 static int rand_init = 0;
770 long int rra_random_row(
774 srandom((unsigned int) time(NULL) + (unsigned int) getpid());
778 return random() % rra->row_cnt;