1 /*****************************************************************************
2 * RRDtool 1.4.2 Copyright by Tobi Oetiker, 1997-2009
3 *****************************************************************************
4 * rrd_open.c Open an RRD File
5 *****************************************************************************
7 *****************************************************************************/
15 #ifdef HAVE_BROKEN_MS_ASYNC
16 #include <sys/types.h>
25 #define _LK_UNLCK 0 /* Unlock */
26 #define _LK_LOCK 1 /* Lock */
27 #define _LK_NBLCK 2 /* Non-blocking lock */
28 #define _LK_RLCK 3 /* Lock for read only */
29 #define _LK_NBRLCK 4 /* Non-blocking lock for read only */
32 #define LK_UNLCK _LK_UNLCK
33 #define LK_LOCK _LK_LOCK
34 #define LK_NBLCK _LK_NBLCK
35 #define LK_RLCK _LK_RLCK
36 #define LK_NBRLCK _LK_NBRLCK
39 /* DEBUG 2 prints information obtained via mincore(2) */
41 /* do not calculate exact madvise hints but assume 1 page for headers and
42 * set DONTNEED for the rest, which is assumed to be data */
43 /* Avoid calling madvise on areas that were already hinted. May be benefical if
44 * your syscalls are very slow */
47 /* the cast to void* is there to avoid this warning seen on ia64 with certain
48 versions of gcc: 'cast increases required alignment of target type'
50 #define __rrd_read(dst, dst_t, cnt) { \
51 size_t wanted = sizeof(dst_t)*(cnt); \
52 if (offset + wanted > rrd_file->file_len) { \
53 rrd_set_error("reached EOF while loading header " #dst); \
54 goto out_nullify_head; \
56 (dst) = (dst_t*)(void*) (data + offset); \
60 #define __rrd_read(dst, dst_t, cnt) { \
61 size_t wanted = sizeof(dst_t)*(cnt); \
63 if ((dst = (dst_t*)malloc(wanted)) == NULL) { \
64 rrd_set_error(#dst " malloc"); \
65 goto out_nullify_head; \
67 got = read (rrd_simple_file->fd, dst, wanted); \
68 if (got != wanted) { \
69 rrd_set_error("short read while reading header " #dst); \
70 goto out_nullify_head; \
76 /* get the address of the start of this page */
77 #if defined USE_MADVISE || defined HAVE_POSIX_FADVISE
79 #define PAGE_START(addr) ((addr)&(~(_page_size-1)))
83 /* Open a database file, return its header and an open filehandle,
84 * positioned to the first cdp in the first rra.
85 * In the error path of rrd_open, only rrd_free(&rrd) has to be called
86 * before returning an error. Do not call rrd_close upon failure of rrd_open.
87 * If creating a new file, the parameter rrd must be initialised with
88 * details of the file content.
89 * If opening an existing file, then use rrd must be initialised by
90 * rrd_init(rrd) prior to invoking rrd_open
94 const char *const file_name,
103 ssize_t _page_size = sysconf(_SC_PAGESIZE);
104 char *data = MAP_FAILED;
108 rrd_file_t *rrd_file = NULL;
109 rrd_simple_file_t *rrd_simple_file = NULL;
110 size_t newfile_size = 0;
111 size_t header_len, value_cnt, data_len;
113 /* Are we creating a new file? */
114 if((rdwr & RRD_CREAT) && (rrd->stat_head != NULL))
116 header_len = rrd_get_header_size(rrd);
119 for (ui = 0; ui < rrd->stat_head->rra_cnt; ui++)
120 value_cnt += rrd->stat_head->ds_cnt * rrd->rra_def[ui].row_cnt;
122 data_len = sizeof(rrd_value_t) * value_cnt;
124 newfile_size = header_len + data_len;
127 rrd_file = (rrd_file_t*)malloc(sizeof(rrd_file_t));
128 if (rrd_file == NULL) {
129 rrd_set_error("allocating rrd_file descriptor for '%s'", file_name);
132 memset(rrd_file, 0, sizeof(rrd_file_t));
134 rrd_file->pvt = malloc(sizeof(rrd_simple_file_t));
135 if(rrd_file->pvt == NULL) {
136 rrd_set_error("allocating rrd_simple_file for '%s'", file_name);
139 memset(rrd_file->pvt, 0, sizeof(rrd_simple_file_t));
140 rrd_simple_file = (rrd_simple_file_t *)rrd_file->pvt;
143 if ((rdwr & (RRD_READONLY | RRD_READWRITE)) ==
144 (RRD_READONLY | RRD_READWRITE)) {
145 /* Both READONLY and READWRITE were given, which is invalid. */
146 rrd_set_error("in read/write request mask");
152 rrd_simple_file->mm_prot = PROT_READ;
153 rrd_simple_file->mm_flags = 0;
156 if (rdwr & RRD_READONLY) {
159 rrd_simple_file->mm_flags = MAP_PRIVATE;
160 # ifdef MAP_NORESERVE
161 rrd_simple_file->mm_flags |= MAP_NORESERVE; /* readonly, so no swap backing needed */
165 if (rdwr & RRD_READWRITE) {
168 rrd_simple_file->mm_flags = MAP_SHARED;
169 rrd_simple_file->mm_prot |= PROT_WRITE;
172 if (rdwr & RRD_CREAT) {
173 flags |= (O_CREAT | O_TRUNC);
175 if (rdwr & RRD_EXCL) {
179 if (rdwr & RRD_READAHEAD) {
181 rrd_simple_file->mm_flags |= MAP_POPULATE; /* populate ptes and data */
183 #if defined MAP_NONBLOCK
184 rrd_simple_file->mm_flags |= MAP_NONBLOCK; /* just populate ptes */
187 #if defined(_WIN32) && !defined(__CYGWIN__) && !defined(__CYGWIN32__)
191 if ((rrd_simple_file->fd = open(file_name, flags, 0666)) < 0) {
192 rrd_set_error("opening '%s': %s", file_name, rrd_strerror(errno));
197 #ifdef HAVE_BROKEN_MS_ASYNC
198 if (rdwr & RRD_READWRITE) {
199 /* some unices, the files mtime does not get update
200 on msync MS_ASYNC, in order to help them,
201 we update the the timestamp at this point.
202 The thing happens pretty 'close' to the open
203 call so the chances of a race should be minimal.
205 Maybe ask your vendor to fix your OS ... */
206 utime(file_name,NULL);
211 /* Better try to avoid seeks as much as possible. stat may be heavy but
212 * many concurrent seeks are even worse. */
213 if (newfile_size == 0 && ((fstat(rrd_simple_file->fd, &statb)) < 0)) {
214 rrd_set_error("fstat '%s': %s", file_name, rrd_strerror(errno));
217 if (newfile_size == 0) {
218 rrd_file->file_len = statb.st_size;
220 rrd_file->file_len = newfile_size;
221 lseek(rrd_simple_file->fd, newfile_size - 1, SEEK_SET);
222 if ( write(rrd_simple_file->fd, "\0", 1) == -1){ /* poke */
223 rrd_set_error("write '%s': %s", file_name, rrd_strerror(errno));
226 lseek(rrd_simple_file->fd, 0, SEEK_SET);
228 #ifdef HAVE_POSIX_FADVISE
229 /* In general we need no read-ahead when dealing with rrd_files.
230 When we stop reading, it is highly unlikely that we start up again.
231 In this manner we actually save time and diskaccess (and buffer cache).
232 Thanks to Dave Plonka for the Idea of using POSIX_FADV_RANDOM here. */
233 posix_fadvise(rrd_simple_file->fd, 0, 0, POSIX_FADV_RANDOM);
237 if (rdwr & RRD_READWRITE)
239 if (setvbuf((rrd_simple_file->fd),NULL,_IONBF,2)) {
240 rrd_set_error("failed to disable the stream buffer\n");
247 data = mmap(0, rrd_file->file_len,
248 rrd_simple_file->mm_prot, rrd_simple_file->mm_flags,
249 rrd_simple_file->fd, offset);
251 /* lets see if the first read worked */
252 if (data == MAP_FAILED) {
253 rrd_set_error("mmaping file '%s': %s", file_name,
254 rrd_strerror(errno));
257 rrd_simple_file->file_start = data;
258 if (rdwr & RRD_CREAT) {
259 memset(data, DNAN, newfile_size - 1);
263 if (rdwr & RRD_CREAT)
266 if (rdwr & RRD_COPY) {
267 /* We will read everything in a moment (copying) */
268 madvise(data, rrd_file->file_len, MADV_WILLNEED );
269 madvise(data, rrd_file->file_len, MADV_SEQUENTIAL );
271 /* We do not need to read anything in for the moment */
272 madvise(data, rrd_file->file_len, MADV_RANDOM);
273 /* the stat_head will be needed soonish, so hint accordingly */
274 madvise(data, sizeof(stat_head_t), MADV_WILLNEED);
275 madvise(data, sizeof(stat_head_t), MADV_RANDOM);
279 __rrd_read(rrd->stat_head, stat_head_t,
282 /* lets do some test if we are on track ... */
283 if (memcmp(rrd->stat_head->cookie, RRD_COOKIE, sizeof(RRD_COOKIE)) != 0) {
284 rrd_set_error("'%s' is not an RRD file", file_name);
285 goto out_nullify_head;
288 if (rrd->stat_head->float_cookie != FLOAT_COOKIE) {
289 rrd_set_error("This RRD was created on another architecture");
290 goto out_nullify_head;
293 version = atoi(rrd->stat_head->version);
295 if (version > atoi(RRD_VERSION)) {
296 rrd_set_error("can't handle RRD file version %s",
297 rrd->stat_head->version);
298 goto out_nullify_head;
300 #if defined USE_MADVISE
301 /* the ds_def will be needed soonish, so hint accordingly */
302 madvise(data + PAGE_START(offset),
303 sizeof(ds_def_t) * rrd->stat_head->ds_cnt, MADV_WILLNEED);
305 __rrd_read(rrd->ds_def, ds_def_t,
306 rrd->stat_head->ds_cnt);
308 #if defined USE_MADVISE
309 /* the rra_def will be needed soonish, so hint accordingly */
310 madvise(data + PAGE_START(offset),
311 sizeof(rra_def_t) * rrd->stat_head->rra_cnt, MADV_WILLNEED);
313 __rrd_read(rrd->rra_def, rra_def_t,
314 rrd->stat_head->rra_cnt);
316 /* handle different format for the live_head */
318 rrd->live_head = (live_head_t *) malloc(sizeof(live_head_t));
319 if (rrd->live_head == NULL) {
320 rrd_set_error("live_head_t malloc");
323 #if defined USE_MADVISE
324 /* the live_head will be needed soonish, so hint accordingly */
325 madvise(data + PAGE_START(offset), sizeof(time_t), MADV_WILLNEED);
327 __rrd_read(rrd->legacy_last_up, time_t,
330 rrd->live_head->last_up = *rrd->legacy_last_up;
331 rrd->live_head->last_up_usec = 0;
333 #if defined USE_MADVISE
334 /* the live_head will be needed soonish, so hint accordingly */
335 madvise(data + PAGE_START(offset),
336 sizeof(live_head_t), MADV_WILLNEED);
338 __rrd_read(rrd->live_head, live_head_t,
341 __rrd_read(rrd->pdp_prep, pdp_prep_t,
342 rrd->stat_head->ds_cnt);
343 __rrd_read(rrd->cdp_prep, cdp_prep_t,
344 rrd->stat_head->rra_cnt * rrd->stat_head->ds_cnt);
345 __rrd_read(rrd->rra_ptr, rra_ptr_t,
346 rrd->stat_head->rra_cnt);
348 rrd_file->header_len = offset;
349 rrd_file->pos = offset;
352 unsigned long row_cnt = 0;
354 for (ui=0; ui<rrd->stat_head->rra_cnt; ui++)
355 row_cnt += rrd->rra_def[ui].row_cnt;
357 size_t correct_len = rrd_file->header_len +
358 sizeof(rrd_value_t) * row_cnt * rrd->stat_head->ds_cnt;
360 if (correct_len > rrd_file->file_len)
362 rrd_set_error("'%s' is too small (should be %ld bytes)",
363 file_name, (long long) correct_len);
364 goto out_nullify_head;
371 rrd->stat_head = NULL;
374 if (data != MAP_FAILED)
375 munmap(data, rrd_file->file_len);
378 close(rrd_simple_file->fd);
386 #if defined DEBUG && DEBUG > 1
387 /* Print list of in-core pages of a the current rrd_file. */
390 rrd_file_t *rrd_file,
393 rrd_simple_file_t *rrd_simple_file;
394 rrd_simple_file = (rrd_simple_file_t *)rrd_file->pvt;
396 /* pretty print blocks in core */
399 ssize_t _page_size = sysconf(_SC_PAGESIZE);
401 off = rrd_file->file_len +
402 ((rrd_file->file_len + _page_size - 1) / _page_size);
406 if (mincore(rrd_simple_file->file_start, rrd_file->file_len, vec) == 0) {
408 unsigned is_in = 0, was_in = 0;
410 for (off = 0, prev = 0; off < rrd_file->file_len; ++off) {
411 is_in = vec[off] & 1; /* if lsb set then is core resident */
414 if (was_in != is_in) {
415 fprintf(stderr, "%s: %sin core: %p len %ld\n", mark,
416 was_in ? "" : "not ", vec + prev, off - prev);
422 "%s: %sin core: %p len %ld\n", mark,
423 was_in ? "" : "not ", vec + prev, off - prev);
425 fprintf(stderr, "mincore: %s", rrd_strerror(errno));
428 fprintf(stderr, "sorry mincore only works with mmap");
431 #endif /* defined DEBUG && DEBUG > 1 */
434 * get exclusive lock to whole file.
435 * lock gets removed when we close the file
437 * returns 0 on success
440 rrd_file_t *rrd_file)
443 rrd_simple_file_t *rrd_simple_file;
444 rrd_simple_file = (rrd_simple_file_t *)rrd_file->pvt;
447 #if defined(_WIN32) && !defined(__CYGWIN__) && !defined(__CYGWIN32__)
450 if (_fstat(rrd_simple_file->fd, &st) == 0) {
451 rcstat = _locking(rrd_simple_file->fd, _LK_NBLCK, st.st_size);
458 lock.l_type = F_WRLCK; /* exclusive write lock */
459 lock.l_len = 0; /* whole file */
460 lock.l_start = 0; /* start of file */
461 lock.l_whence = SEEK_SET; /* end of file */
463 rcstat = fcntl(rrd_simple_file->fd, F_SETLK, &lock);
471 /* drop cache except for the header and the active pages */
473 rrd_file_t *rrd_file,
476 rrd_simple_file_t *rrd_simple_file = (rrd_simple_file_t *)rrd_file->pvt;
477 #if defined USE_MADVISE || defined HAVE_POSIX_FADVISE
478 size_t dontneed_start;
482 ssize_t _page_size = sysconf(_SC_PAGESIZE);
484 if (rrd_file == NULL) {
485 #if defined DEBUG && DEBUG
486 fprintf (stderr, "rrd_dontneed: Argument 'rrd_file' is NULL.\n");
491 #if defined DEBUG && DEBUG > 1
492 mincore_print(rrd_file, "before");
495 /* ignoring errors from RRDs that are smaller then the file_len+rounding */
496 rra_start = rrd_file->header_len;
497 dontneed_start = PAGE_START(rra_start) + _page_size;
498 for (i = 0; i < rrd->stat_head->rra_cnt; ++i) {
501 + rrd->rra_ptr[i].cur_row
502 * rrd->stat_head->ds_cnt * sizeof(rrd_value_t));
503 if (active_block > dontneed_start) {
505 madvise(rrd_simple_file->file_start + dontneed_start,
506 active_block - dontneed_start - 1, MADV_DONTNEED);
508 /* in linux at least only fadvise DONTNEED seems to purge pages from cache */
509 #ifdef HAVE_POSIX_FADVISE
510 posix_fadvise(rrd_simple_file->fd, dontneed_start,
511 active_block - dontneed_start - 1,
512 POSIX_FADV_DONTNEED);
515 dontneed_start = active_block;
516 /* do not release 'hot' block if update for this RAA will occur
517 * within 10 minutes */
518 if (rrd->stat_head->pdp_step * rrd->rra_def[i].pdp_cnt -
519 rrd->live_head->last_up % (rrd->stat_head->pdp_step *
520 rrd->rra_def[i].pdp_cnt) < 10 * 60) {
521 dontneed_start += _page_size;
524 rrd->rra_def[i].row_cnt * rrd->stat_head->ds_cnt *
528 if (dontneed_start < rrd_file->file_len) {
530 madvise(rrd_simple_file->file_start + dontneed_start,
531 rrd_file->file_len - dontneed_start, MADV_DONTNEED);
533 #ifdef HAVE_POSIX_FADVISE
534 posix_fadvise(rrd_simple_file->fd, dontneed_start,
535 rrd_file->file_len - dontneed_start,
536 POSIX_FADV_DONTNEED);
540 #if defined DEBUG && DEBUG > 1
541 mincore_print(rrd_file, "after");
543 #endif /* without madvise and posix_fadvise it does not make much sense todo anything */
551 rrd_file_t *rrd_file)
553 rrd_simple_file_t *rrd_simple_file;
554 rrd_simple_file = (rrd_simple_file_t *)rrd_file->pvt;
558 ret = msync(rrd_simple_file->file_start, rrd_file->file_len, MS_ASYNC);
560 rrd_set_error("msync rrd_file: %s", rrd_strerror(errno));
561 ret = munmap(rrd_simple_file->file_start, rrd_file->file_len);
563 rrd_set_error("munmap rrd_file: %s", rrd_strerror(errno));
565 ret = close(rrd_simple_file->fd);
567 rrd_set_error("closing file: %s", rrd_strerror(errno));
575 /* Set position of rrd_file. */
578 rrd_file_t *rrd_file,
583 rrd_simple_file_t *rrd_simple_file;
584 rrd_simple_file = (rrd_simple_file_t *)rrd_file->pvt;
587 if (whence == SEEK_SET)
589 else if (whence == SEEK_CUR)
590 rrd_file->pos += off;
591 else if (whence == SEEK_END)
592 rrd_file->pos = rrd_file->file_len + off;
594 ret = lseek(rrd_simple_file->fd, off, whence);
596 rrd_set_error("lseek: %s", rrd_strerror(errno));
599 /* mimic fseek, which returns 0 upon success */
600 return ret < 0; /*XXX: or just ret to mimic lseek */
604 /* Get current position in rrd_file. */
607 rrd_file_t *rrd_file)
609 return rrd_file->pos;
613 /* Read count bytes into buffer buf, starting at rrd_file->pos.
614 * Returns the number of bytes read or <0 on error. */
617 rrd_file_t *rrd_file,
621 rrd_simple_file_t *rrd_simple_file = (rrd_simple_file_t *)rrd_file->pvt;
626 if (rrd_file->pos > rrd_file->file_len || _cnt == 0) /* EOF */
629 return -1; /* EINVAL */
630 _surplus = rrd_file->pos + _cnt - rrd_file->file_len;
631 if (_surplus > 0) { /* short read */
636 buf = memcpy(buf, rrd_simple_file->file_start + rrd_file->pos, _cnt);
638 rrd_file->pos += _cnt; /* mimmic read() semantics */
643 ret = read(rrd_simple_file->fd, buf, count);
645 rrd_file->pos += ret; /* mimmic read() semantics */
651 /* Write count bytes from buffer buf to the current position
652 * rrd_file->pos of rrd_simple_file->fd.
653 * Returns the number of bytes written or <0 on error. */
656 rrd_file_t *rrd_file,
660 rrd_simple_file_t *rrd_simple_file = (rrd_simple_file_t *)rrd_file->pvt;
662 size_t old_size = rrd_file->file_len;
666 return -1; /* EINVAL */
668 if((rrd_file->pos + count) > old_size)
670 rrd_set_error("attempting to write beyond end of file");
673 memcpy(rrd_simple_file->file_start + rrd_file->pos, buf, count);
674 rrd_file->pos += count;
675 return count; /* mimmic write() semantics */
677 ssize_t _sz = write(rrd_simple_file->fd, buf, count);
680 rrd_file->pos += _sz;
686 /* this is a leftover from the old days, it serves no purpose
687 and is therefore turned into a no-op */
689 rrd_file_t *rrd_file __attribute__((unused)))
693 /* Initialize RRD header. */
698 rrd->stat_head = NULL;
701 rrd->live_head = NULL;
702 rrd->legacy_last_up = NULL;
704 rrd->pdp_prep = NULL;
705 rrd->cdp_prep = NULL;
706 rrd->rrd_value = NULL;
710 /* free RRD header data. */
716 if (rrd->legacy_last_up) { /* this gets set for version < 3 only */
717 free(rrd->live_head);
724 free(rrd->live_head);
725 free(rrd->stat_head);
731 free(rrd->rrd_value);
736 /* routine used by external libraries to free memory allocated by
746 * rra_update informs us about the RRAs being updated
747 * The low level storage API may use this information for
748 * aligning RRAs within stripes, or other performance enhancements
751 rrd_file_t *rrd_file __attribute__((unused)),
752 int rra_idx __attribute__((unused)),
753 unsigned long rra_row __attribute__((unused)),
754 time_t rra_time __attribute__((unused)))
759 * This function is called when creating a new RRD
760 * The storage implementation can use this opportunity to select
761 * a sensible starting row within the file.
762 * The default implementation is random, to ensure that all RRAs
763 * don't change to a new disk block at the same time
765 unsigned long rrd_select_initial_row(
766 rrd_file_t *rrd_file __attribute__((unused)),
767 int rra_idx __attribute__((unused)),
771 return rrd_random() % rra->row_cnt;