1 /*****************************************************************************
2 * RRDtool 1.4.2 Copyright by Tobi Oetiker, 1997-2009
3 *****************************************************************************
4 * rrd_open.c Open an RRD File
5 *****************************************************************************
7 *****************************************************************************/
15 #ifdef HAVE_BROKEN_MS_ASYNC
16 #include <sys/types.h>
25 #define _LK_UNLCK 0 /* Unlock */
26 #define _LK_LOCK 1 /* Lock */
27 #define _LK_NBLCK 2 /* Non-blocking lock */
28 #define _LK_RLCK 3 /* Lock for read only */
29 #define _LK_NBRLCK 4 /* Non-blocking lock for read only */
32 #define LK_UNLCK _LK_UNLCK
33 #define LK_LOCK _LK_LOCK
34 #define LK_NBLCK _LK_NBLCK
35 #define LK_RLCK _LK_RLCK
36 #define LK_NBRLCK _LK_NBRLCK
39 /* DEBUG 2 prints information obtained via mincore(2) */
41 /* do not calculate exact madvise hints but assume 1 page for headers and
42 * set DONTNEED for the rest, which is assumed to be data */
43 /* Avoid calling madvise on areas that were already hinted. May be benefical if
44 * your syscalls are very slow */
47 /* the cast to void* is there to avoid this warning seen on ia64 with certain
48 versions of gcc: 'cast increases required alignment of target type'
50 #define __rrd_read(dst, dst_t, cnt) { \
51 size_t wanted = sizeof(dst_t)*(cnt); \
52 if (offset + wanted > rrd_file->file_len) { \
53 rrd_set_error("reached EOF while loading header " #dst); \
54 goto out_nullify_head; \
56 (dst) = (dst_t*)(void*) (data + offset); \
60 #define __rrd_read(dst, dst_t, cnt) { \
61 size_t wanted = sizeof(dst_t)*(cnt); \
63 if ((dst = (dst_t*)malloc(wanted)) == NULL) { \
64 rrd_set_error(#dst " malloc"); \
65 goto out_nullify_head; \
67 got = read (rrd_simple_file->fd, dst, wanted); \
68 if (got != wanted) { \
69 rrd_set_error("short read while reading header " #dst); \
70 goto out_nullify_head; \
76 /* get the address of the start of this page */
77 #if defined USE_MADVISE || defined HAVE_POSIX_FADVISE
79 #define PAGE_START(addr) ((addr)&(~(_page_size-1)))
83 /* Open a database file, return its header and an open filehandle,
84 * positioned to the first cdp in the first rra.
85 * In the error path of rrd_open, only rrd_free(&rrd) has to be called
86 * before returning an error. Do not call rrd_close upon failure of rrd_open.
87 * If creating a new file, the parameter rrd must be initialised with
88 * details of the file content.
89 * If opening an existing file, then use rrd must be initialised by
90 * rrd_init(rrd) prior to invoking rrd_open
94 const char *const file_name,
103 ssize_t _page_size = sysconf(_SC_PAGESIZE);
104 char *data = MAP_FAILED;
108 rrd_file_t *rrd_file = NULL;
109 rrd_simple_file_t *rrd_simple_file = NULL;
110 size_t newfile_size = 0;
111 size_t header_len, value_cnt, data_len;
113 /* Are we creating a new file? */
114 if((rdwr & RRD_CREAT) && (rrd->stat_head != NULL))
116 header_len = rrd_get_header_size(rrd);
119 for (ui = 0; ui < rrd->stat_head->rra_cnt; ui++)
120 value_cnt += rrd->stat_head->ds_cnt * rrd->rra_def[ui].row_cnt;
122 data_len = sizeof(rrd_value_t) * value_cnt;
124 newfile_size = header_len + data_len;
127 rrd_file = (rrd_file_t*)malloc(sizeof(rrd_file_t));
128 if (rrd_file == NULL) {
129 rrd_set_error("allocating rrd_file descriptor for '%s'", file_name);
132 memset(rrd_file, 0, sizeof(rrd_file_t));
134 rrd_file->pvt = malloc(sizeof(rrd_simple_file_t));
135 if(rrd_file->pvt == NULL) {
136 rrd_set_error("allocating rrd_simple_file for '%s'", file_name);
139 memset(rrd_file->pvt, 0, sizeof(rrd_simple_file_t));
140 rrd_simple_file = (rrd_simple_file_t *)rrd_file->pvt;
143 if ((rdwr & (RRD_READONLY | RRD_READWRITE)) ==
144 (RRD_READONLY | RRD_READWRITE)) {
145 /* Both READONLY and READWRITE were given, which is invalid. */
146 rrd_set_error("in read/write request mask");
152 rrd_simple_file->mm_prot = PROT_READ;
153 rrd_simple_file->mm_flags = 0;
156 if (rdwr & RRD_READONLY) {
159 rrd_simple_file->mm_flags = MAP_PRIVATE;
160 # ifdef MAP_NORESERVE
161 rrd_simple_file->mm_flags |= MAP_NORESERVE; /* readonly, so no swap backing needed */
165 if (rdwr & RRD_READWRITE) {
168 rrd_simple_file->mm_flags = MAP_SHARED;
169 rrd_simple_file->mm_prot |= PROT_WRITE;
172 if (rdwr & RRD_CREAT) {
173 flags |= (O_CREAT | O_TRUNC);
176 if (rdwr & RRD_READAHEAD) {
178 rrd_simple_file->mm_flags |= MAP_POPULATE; /* populate ptes and data */
180 #if defined MAP_NONBLOCK
181 rrd_simple_file->mm_flags |= MAP_NONBLOCK; /* just populate ptes */
184 #if defined(_WIN32) && !defined(__CYGWIN__) && !defined(__CYGWIN32__)
188 if ((rrd_simple_file->fd = open(file_name, flags, 0666)) < 0) {
189 rrd_set_error("opening '%s': %s", file_name, rrd_strerror(errno));
194 #ifdef HAVE_BROKEN_MS_ASYNC
195 if (rdwr & RRD_READWRITE) {
196 /* some unices, the files mtime does not get update
197 on msync MS_ASYNC, in order to help them,
198 we update the the timestamp at this point.
199 The thing happens pretty 'close' to the open
200 call so the chances of a race should be minimal.
202 Maybe ask your vendor to fix your OS ... */
203 utime(file_name,NULL);
208 /* Better try to avoid seeks as much as possible. stat may be heavy but
209 * many concurrent seeks are even worse. */
210 if (newfile_size == 0 && ((fstat(rrd_simple_file->fd, &statb)) < 0)) {
211 rrd_set_error("fstat '%s': %s", file_name, rrd_strerror(errno));
214 if (newfile_size == 0) {
215 rrd_file->file_len = statb.st_size;
217 rrd_file->file_len = newfile_size;
218 lseek(rrd_simple_file->fd, newfile_size - 1, SEEK_SET);
219 if ( write(rrd_simple_file->fd, "\0", 1) == -1){ /* poke */
220 rrd_set_error("write '%s': %s", file_name, rrd_strerror(errno));
223 lseek(rrd_simple_file->fd, 0, SEEK_SET);
225 #ifdef HAVE_POSIX_FADVISE
226 /* In general we need no read-ahead when dealing with rrd_files.
227 When we stop reading, it is highly unlikely that we start up again.
228 In this manner we actually save time and diskaccess (and buffer cache).
229 Thanks to Dave Plonka for the Idea of using POSIX_FADV_RANDOM here. */
230 posix_fadvise(rrd_simple_file->fd, 0, 0, POSIX_FADV_RANDOM);
234 if (rdwr & RRD_READWRITE)
236 if (setvbuf((rrd_simple_file->fd),NULL,_IONBF,2)) {
237 rrd_set_error("failed to disable the stream buffer\n");
244 data = mmap(0, rrd_file->file_len,
245 rrd_simple_file->mm_prot, rrd_simple_file->mm_flags,
246 rrd_simple_file->fd, offset);
248 /* lets see if the first read worked */
249 if (data == MAP_FAILED) {
250 rrd_set_error("mmaping file '%s': %s", file_name,
251 rrd_strerror(errno));
254 rrd_simple_file->file_start = data;
255 if (rdwr & RRD_CREAT) {
256 memset(data, DNAN, newfile_size - 1);
260 if (rdwr & RRD_CREAT)
263 if (rdwr & RRD_COPY) {
264 /* We will read everything in a moment (copying) */
265 madvise(data, rrd_file->file_len, MADV_WILLNEED );
266 madvise(data, rrd_file->file_len, MADV_SEQUENTIAL );
268 /* We do not need to read anything in for the moment */
269 madvise(data, rrd_file->file_len, MADV_RANDOM);
270 /* the stat_head will be needed soonish, so hint accordingly */
271 madvise(data, sizeof(stat_head_t), MADV_WILLNEED);
272 madvise(data, sizeof(stat_head_t), MADV_RANDOM);
276 __rrd_read(rrd->stat_head, stat_head_t,
279 /* lets do some test if we are on track ... */
280 if (memcmp(rrd->stat_head->cookie, RRD_COOKIE, sizeof(RRD_COOKIE)) != 0) {
281 rrd_set_error("'%s' is not an RRD file", file_name);
282 goto out_nullify_head;
285 if (rrd->stat_head->float_cookie != FLOAT_COOKIE) {
286 rrd_set_error("This RRD was created on another architecture");
287 goto out_nullify_head;
290 version = atoi(rrd->stat_head->version);
292 if (version > atoi(RRD_VERSION)) {
293 rrd_set_error("can't handle RRD file version %s",
294 rrd->stat_head->version);
295 goto out_nullify_head;
297 #if defined USE_MADVISE
298 /* the ds_def will be needed soonish, so hint accordingly */
299 madvise(data + PAGE_START(offset),
300 sizeof(ds_def_t) * rrd->stat_head->ds_cnt, MADV_WILLNEED);
302 __rrd_read(rrd->ds_def, ds_def_t,
303 rrd->stat_head->ds_cnt);
305 #if defined USE_MADVISE
306 /* the rra_def will be needed soonish, so hint accordingly */
307 madvise(data + PAGE_START(offset),
308 sizeof(rra_def_t) * rrd->stat_head->rra_cnt, MADV_WILLNEED);
310 __rrd_read(rrd->rra_def, rra_def_t,
311 rrd->stat_head->rra_cnt);
313 /* handle different format for the live_head */
315 rrd->live_head = (live_head_t *) malloc(sizeof(live_head_t));
316 if (rrd->live_head == NULL) {
317 rrd_set_error("live_head_t malloc");
320 #if defined USE_MADVISE
321 /* the live_head will be needed soonish, so hint accordingly */
322 madvise(data + PAGE_START(offset), sizeof(time_t), MADV_WILLNEED);
324 __rrd_read(rrd->legacy_last_up, time_t,
327 rrd->live_head->last_up = *rrd->legacy_last_up;
328 rrd->live_head->last_up_usec = 0;
330 #if defined USE_MADVISE
331 /* the live_head will be needed soonish, so hint accordingly */
332 madvise(data + PAGE_START(offset),
333 sizeof(live_head_t), MADV_WILLNEED);
335 __rrd_read(rrd->live_head, live_head_t,
338 __rrd_read(rrd->pdp_prep, pdp_prep_t,
339 rrd->stat_head->ds_cnt);
340 __rrd_read(rrd->cdp_prep, cdp_prep_t,
341 rrd->stat_head->rra_cnt * rrd->stat_head->ds_cnt);
342 __rrd_read(rrd->rra_ptr, rra_ptr_t,
343 rrd->stat_head->rra_cnt);
345 rrd_file->header_len = offset;
346 rrd_file->pos = offset;
349 unsigned long row_cnt = 0;
351 for (ui=0; ui<rrd->stat_head->rra_cnt; ui++)
352 row_cnt += rrd->rra_def[ui].row_cnt;
354 size_t correct_len = rrd_file->header_len +
355 sizeof(rrd_value_t) * row_cnt * rrd->stat_head->ds_cnt;
357 if (correct_len > rrd_file->file_len)
359 rrd_set_error("'%s' is too small (should be %ld bytes)",
360 file_name, (long long) correct_len);
361 goto out_nullify_head;
368 rrd->stat_head = NULL;
371 if (data != MAP_FAILED)
372 munmap(data, rrd_file->file_len);
375 close(rrd_simple_file->fd);
383 #if defined DEBUG && DEBUG > 1
384 /* Print list of in-core pages of a the current rrd_file. */
387 rrd_file_t *rrd_file,
390 rrd_simple_file_t *rrd_simple_file;
391 rrd_simple_file = (rrd_simple_file_t *)rrd_file->pvt;
393 /* pretty print blocks in core */
396 ssize_t _page_size = sysconf(_SC_PAGESIZE);
398 off = rrd_file->file_len +
399 ((rrd_file->file_len + _page_size - 1) / _page_size);
403 if (mincore(rrd_simple_file->file_start, rrd_file->file_len, vec) == 0) {
405 unsigned is_in = 0, was_in = 0;
407 for (off = 0, prev = 0; off < rrd_file->file_len; ++off) {
408 is_in = vec[off] & 1; /* if lsb set then is core resident */
411 if (was_in != is_in) {
412 fprintf(stderr, "%s: %sin core: %p len %ld\n", mark,
413 was_in ? "" : "not ", vec + prev, off - prev);
419 "%s: %sin core: %p len %ld\n", mark,
420 was_in ? "" : "not ", vec + prev, off - prev);
422 fprintf(stderr, "mincore: %s", rrd_strerror(errno));
425 fprintf(stderr, "sorry mincore only works with mmap");
428 #endif /* defined DEBUG && DEBUG > 1 */
431 * get exclusive lock to whole file.
432 * lock gets removed when we close the file
434 * returns 0 on success
437 rrd_file_t *rrd_file)
440 rrd_simple_file_t *rrd_simple_file;
441 rrd_simple_file = (rrd_simple_file_t *)rrd_file->pvt;
444 #if defined(_WIN32) && !defined(__CYGWIN__) && !defined(__CYGWIN32__)
447 if (_fstat(rrd_simple_file->fd, &st) == 0) {
448 rcstat = _locking(rrd_simple_file->fd, _LK_NBLCK, st.st_size);
455 lock.l_type = F_WRLCK; /* exclusive write lock */
456 lock.l_len = 0; /* whole file */
457 lock.l_start = 0; /* start of file */
458 lock.l_whence = SEEK_SET; /* end of file */
460 rcstat = fcntl(rrd_simple_file->fd, F_SETLK, &lock);
468 /* drop cache except for the header and the active pages */
470 rrd_file_t *rrd_file,
473 rrd_simple_file_t *rrd_simple_file = (rrd_simple_file_t *)rrd_file->pvt;
474 #if defined USE_MADVISE || defined HAVE_POSIX_FADVISE
475 size_t dontneed_start;
479 ssize_t _page_size = sysconf(_SC_PAGESIZE);
481 if (rrd_file == NULL) {
482 #if defined DEBUG && DEBUG
483 fprintf (stderr, "rrd_dontneed: Argument 'rrd_file' is NULL.\n");
488 #if defined DEBUG && DEBUG > 1
489 mincore_print(rrd_file, "before");
492 /* ignoring errors from RRDs that are smaller then the file_len+rounding */
493 rra_start = rrd_file->header_len;
494 dontneed_start = PAGE_START(rra_start) + _page_size;
495 for (i = 0; i < rrd->stat_head->rra_cnt; ++i) {
498 + rrd->rra_ptr[i].cur_row
499 * rrd->stat_head->ds_cnt * sizeof(rrd_value_t));
500 if (active_block > dontneed_start) {
502 madvise(rrd_simple_file->file_start + dontneed_start,
503 active_block - dontneed_start - 1, MADV_DONTNEED);
505 /* in linux at least only fadvise DONTNEED seems to purge pages from cache */
506 #ifdef HAVE_POSIX_FADVISE
507 posix_fadvise(rrd_simple_file->fd, dontneed_start,
508 active_block - dontneed_start - 1,
509 POSIX_FADV_DONTNEED);
512 dontneed_start = active_block;
513 /* do not release 'hot' block if update for this RAA will occur
514 * within 10 minutes */
515 if (rrd->stat_head->pdp_step * rrd->rra_def[i].pdp_cnt -
516 rrd->live_head->last_up % (rrd->stat_head->pdp_step *
517 rrd->rra_def[i].pdp_cnt) < 10 * 60) {
518 dontneed_start += _page_size;
521 rrd->rra_def[i].row_cnt * rrd->stat_head->ds_cnt *
525 if (dontneed_start < rrd_file->file_len) {
527 madvise(rrd_simple_file->file_start + dontneed_start,
528 rrd_file->file_len - dontneed_start, MADV_DONTNEED);
530 #ifdef HAVE_POSIX_FADVISE
531 posix_fadvise(rrd_simple_file->fd, dontneed_start,
532 rrd_file->file_len - dontneed_start,
533 POSIX_FADV_DONTNEED);
537 #if defined DEBUG && DEBUG > 1
538 mincore_print(rrd_file, "after");
540 #endif /* without madvise and posix_fadvise it does not make much sense todo anything */
548 rrd_file_t *rrd_file)
550 rrd_simple_file_t *rrd_simple_file;
551 rrd_simple_file = (rrd_simple_file_t *)rrd_file->pvt;
555 ret = msync(rrd_simple_file->file_start, rrd_file->file_len, MS_ASYNC);
557 rrd_set_error("msync rrd_file: %s", rrd_strerror(errno));
558 ret = munmap(rrd_simple_file->file_start, rrd_file->file_len);
560 rrd_set_error("munmap rrd_file: %s", rrd_strerror(errno));
562 ret = close(rrd_simple_file->fd);
564 rrd_set_error("closing file: %s", rrd_strerror(errno));
572 /* Set position of rrd_file. */
575 rrd_file_t *rrd_file,
580 rrd_simple_file_t *rrd_simple_file;
581 rrd_simple_file = (rrd_simple_file_t *)rrd_file->pvt;
584 if (whence == SEEK_SET)
586 else if (whence == SEEK_CUR)
587 rrd_file->pos += off;
588 else if (whence == SEEK_END)
589 rrd_file->pos = rrd_file->file_len + off;
591 ret = lseek(rrd_simple_file->fd, off, whence);
593 rrd_set_error("lseek: %s", rrd_strerror(errno));
596 /* mimic fseek, which returns 0 upon success */
597 return ret < 0; /*XXX: or just ret to mimic lseek */
601 /* Get current position in rrd_file. */
604 rrd_file_t *rrd_file)
606 return rrd_file->pos;
610 /* Read count bytes into buffer buf, starting at rrd_file->pos.
611 * Returns the number of bytes read or <0 on error. */
614 rrd_file_t *rrd_file,
618 rrd_simple_file_t *rrd_simple_file = (rrd_simple_file_t *)rrd_file->pvt;
623 if (rrd_file->pos > rrd_file->file_len || _cnt == 0) /* EOF */
626 return -1; /* EINVAL */
627 _surplus = rrd_file->pos + _cnt - rrd_file->file_len;
628 if (_surplus > 0) { /* short read */
633 buf = memcpy(buf, rrd_simple_file->file_start + rrd_file->pos, _cnt);
635 rrd_file->pos += _cnt; /* mimmic read() semantics */
640 ret = read(rrd_simple_file->fd, buf, count);
642 rrd_file->pos += ret; /* mimmic read() semantics */
648 /* Write count bytes from buffer buf to the current position
649 * rrd_file->pos of rrd_simple_file->fd.
650 * Returns the number of bytes written or <0 on error. */
653 rrd_file_t *rrd_file,
657 rrd_simple_file_t *rrd_simple_file = (rrd_simple_file_t *)rrd_file->pvt;
659 size_t old_size = rrd_file->file_len;
663 return -1; /* EINVAL */
665 if((rrd_file->pos + count) > old_size)
667 rrd_set_error("attempting to write beyond end of file");
670 memcpy(rrd_simple_file->file_start + rrd_file->pos, buf, count);
671 rrd_file->pos += count;
672 return count; /* mimmic write() semantics */
674 ssize_t _sz = write(rrd_simple_file->fd, buf, count);
677 rrd_file->pos += _sz;
683 /* this is a leftover from the old days, it serves no purpose
684 and is therefore turned into a no-op */
686 rrd_file_t *rrd_file __attribute__((unused)))
690 /* Initialize RRD header. */
695 rrd->stat_head = NULL;
698 rrd->live_head = NULL;
699 rrd->legacy_last_up = NULL;
701 rrd->pdp_prep = NULL;
702 rrd->cdp_prep = NULL;
703 rrd->rrd_value = NULL;
707 /* free RRD header data. */
713 if (rrd->legacy_last_up) { /* this gets set for version < 3 only */
714 free(rrd->live_head);
721 free(rrd->live_head);
722 free(rrd->stat_head);
728 free(rrd->rrd_value);
733 /* routine used by external libraries to free memory allocated by
743 * rra_update informs us about the RRAs being updated
744 * The low level storage API may use this information for
745 * aligning RRAs within stripes, or other performance enhancements
748 rrd_file_t *rrd_file __attribute__((unused)),
749 int rra_idx __attribute__((unused)),
750 unsigned long rra_row __attribute__((unused)),
751 time_t rra_time __attribute__((unused)))
756 * This function is called when creating a new RRD
757 * The storage implementation can use this opportunity to select
758 * a sensible starting row within the file.
759 * The default implementation is random, to ensure that all RRAs
760 * don't change to a new disk block at the same time
762 unsigned long rrd_select_initial_row(
763 rrd_file_t *rrd_file __attribute__((unused)),
764 int rra_idx __attribute__((unused)),
768 return rrd_random() % rra->row_cnt;