1 /*****************************************************************************
2 * RRDtool 1.3.2 Copyright by Tobi Oetiker, 1997-2008
3 *****************************************************************************
4 * rrd_open.c Open an RRD File
5 *****************************************************************************
7 *****************************************************************************/
19 # define random() rand()
20 # define srandom(x) srand(x)
23 #define _LK_UNLCK 0 /* Unlock */
24 #define _LK_LOCK 1 /* Lock */
25 #define _LK_NBLCK 2 /* Non-blocking lock */
26 #define _LK_RLCK 3 /* Lock for read only */
27 #define _LK_NBRLCK 4 /* Non-blocking lock for read only */
30 #define LK_UNLCK _LK_UNLCK
31 #define LK_LOCK _LK_LOCK
32 #define LK_NBLCK _LK_NBLCK
33 #define LK_RLCK _LK_RLCK
34 #define LK_NBRLCK _LK_NBRLCK
37 /* DEBUG 2 prints information obtained via mincore(2) */
39 /* do not calculate exact madvise hints but assume 1 page for headers and
40 * set DONTNEED for the rest, which is assumed to be data */
41 /* Avoid calling madvise on areas that were already hinted. May be benefical if
42 * your syscalls are very slow */
45 /* the cast to void* is there to avoid this warning seen on ia64 with certain
46 versions of gcc: 'cast increases required alignment of target type'
48 #define __rrd_read(dst, dst_t, cnt) { \
49 size_t wanted = sizeof(dst_t)*(cnt); \
50 if (offset + wanted > rrd_file->file_len) { \
51 rrd_set_error("reached EOF while loading header " #dst); \
52 goto out_nullify_head; \
54 (dst) = (dst_t*)(void*) (data + offset); \
58 #define __rrd_read(dst, dst_t, cnt) { \
59 size_t wanted = sizeof(dst_t)*(cnt); \
61 if ((dst = (dst_t*)malloc(wanted)) == NULL) { \
62 rrd_set_error(#dst " malloc"); \
63 goto out_nullify_head; \
65 got = read (rrd_simple_file->fd, dst, wanted); \
66 if (got != wanted) { \
67 rrd_set_error("short read while reading header " #dst); \
68 goto out_nullify_head; \
74 /* get the address of the start of this page */
75 #if defined USE_MADVISE || defined HAVE_POSIX_FADVISE
77 #define PAGE_START(addr) ((addr)&(~(_page_size-1)))
81 long int rra_random_row(
85 /* Open a database file, return its header and an open filehandle,
86 * positioned to the first cdp in the first rra.
87 * In the error path of rrd_open, only rrd_free(&rrd) has to be called
88 * before returning an error. Do not call rrd_close upon failure of rrd_open.
89 * If creating a new file, the parameter rrd must be initialised with
90 * details of the file content.
91 * If opening an existing file, then use rrd must be initialised by
92 * rrd_init(rrd) prior to invoking rrd_open
96 const char *const file_name,
105 ssize_t _page_size = sysconf(_SC_PAGESIZE);
106 char *data = MAP_FAILED;
110 rrd_file_t *rrd_file = NULL;
111 rrd_simple_file_t *rrd_simple_file = NULL;
112 size_t newfile_size = 0;
113 size_t header_len, value_cnt, data_len;
115 /* Are we creating a new file? */
116 if((rdwr & RRD_CREAT) && (rrd->stat_head != NULL))
119 sizeof(stat_head_t) + \
120 sizeof(ds_def_t) * rrd->stat_head->ds_cnt + \
121 sizeof(rra_def_t) * rrd->stat_head->rra_cnt + \
123 sizeof(live_head_t) + \
124 sizeof(pdp_prep_t) * rrd->stat_head->ds_cnt + \
125 sizeof(cdp_prep_t) * rrd->stat_head->ds_cnt * rrd->stat_head->rra_cnt + \
126 sizeof(rra_ptr_t) * rrd->stat_head->rra_cnt;
129 for (ui = 0; ui < rrd->stat_head->rra_cnt; ui++)
130 value_cnt += rrd->stat_head->ds_cnt * rrd->rra_def[ui].row_cnt;
132 data_len = sizeof(rrd_value_t) * value_cnt;
134 newfile_size = header_len + data_len;
137 rrd_file = (rrd_file_t*)malloc(sizeof(rrd_file_t));
138 if (rrd_file == NULL) {
139 rrd_set_error("allocating rrd_file descriptor for '%s'", file_name);
142 memset(rrd_file, 0, sizeof(rrd_file_t));
144 rrd_file->pvt = malloc(sizeof(rrd_simple_file_t));
145 if(rrd_file->pvt == NULL) {
146 rrd_set_error("allocating rrd_simple_file for '%s'", file_name);
149 memset(rrd_file->pvt, 0, sizeof(rrd_simple_file_t));
150 rrd_simple_file = (rrd_simple_file_t *)rrd_file->pvt;
153 if ((rdwr & (RRD_READONLY | RRD_READWRITE)) ==
154 (RRD_READONLY | RRD_READWRITE)) {
155 /* Both READONLY and READWRITE were given, which is invalid. */
156 rrd_set_error("in read/write request mask");
162 rrd_simple_file->mm_prot = PROT_READ;
163 rrd_simple_file->mm_flags = 0;
166 if (rdwr & RRD_READONLY) {
169 rrd_simple_file->mm_flags = MAP_PRIVATE;
170 # ifdef MAP_NORESERVE
171 rrd_simple_file->mm_flags |= MAP_NORESERVE; /* readonly, so no swap backing needed */
175 if (rdwr & RRD_READWRITE) {
178 rrd_simple_file->mm_flags = MAP_SHARED;
179 rrd_simple_file->mm_prot |= PROT_WRITE;
182 if (rdwr & RRD_CREAT) {
183 flags |= (O_CREAT | O_TRUNC);
186 if (rdwr & RRD_READAHEAD) {
188 rrd_simple_file->mm_flags |= MAP_POPULATE; /* populate ptes and data */
190 #if defined MAP_NONBLOCK
191 rrd_simple_file->mm_flags |= MAP_NONBLOCK; /* just populate ptes */
194 #if defined(_WIN32) && !defined(__CYGWIN__) && !defined(__CYGWIN32__)
198 if ((rrd_simple_file->fd = open(file_name, flags, 0666)) < 0) {
199 rrd_set_error("opening '%s': %s", file_name, rrd_strerror(errno));
204 #ifdef HAVE_BROKEN_MS_ASYNC
205 if (rdwr & RRD_READWRITE) {
206 /* some unices, the files mtime does not get update
207 on msync MS_ASYNC, in order to help them,
208 we update the the timestamp at this point.
209 The thing happens pretty 'close' to the open
210 call so the chances of a race should be minimal.
212 Maybe ask your vendor to fix your OS ... */
213 utime(file_name,NULL);
218 /* Better try to avoid seeks as much as possible. stat may be heavy but
219 * many concurrent seeks are even worse. */
220 if (newfile_size == 0 && ((fstat(rrd_simple_file->fd, &statb)) < 0)) {
221 rrd_set_error("fstat '%s': %s", file_name, rrd_strerror(errno));
224 if (newfile_size == 0) {
225 rrd_file->file_len = statb.st_size;
227 rrd_file->file_len = newfile_size;
228 lseek(rrd_simple_file->fd, newfile_size - 1, SEEK_SET);
229 write(rrd_simple_file->fd, "\0", 1); /* poke */
230 lseek(rrd_simple_file->fd, 0, SEEK_SET);
232 #ifdef HAVE_POSIX_FADVISE
233 /* In general we need no read-ahead when dealing with rrd_files.
234 When we stop reading, it is highly unlikely that we start up again.
235 In this manner we actually save time and diskaccess (and buffer cache).
236 Thanks to Dave Plonka for the Idea of using POSIX_FADV_RANDOM here. */
237 posix_fadvise(rrd_simple_file->fd, 0, 0, POSIX_FADV_RANDOM);
241 if (rdwr & RRD_READWRITE)
243 if (setvbuf((rrd_simple_file->fd),NULL,_IONBF,2)) {
244 rrd_set_error("failed to disable the stream buffer\n");
251 data = mmap(0, rrd_file->file_len,
252 rrd_simple_file->mm_prot, rrd_simple_file->mm_flags,
253 rrd_simple_file->fd, offset);
255 /* lets see if the first read worked */
256 if (data == MAP_FAILED) {
257 rrd_set_error("mmaping file '%s': %s", file_name,
258 rrd_strerror(errno));
261 rrd_simple_file->file_start = data;
262 if (rdwr & RRD_CREAT) {
263 memset(data, DNAN, newfile_size - 1);
267 if (rdwr & RRD_CREAT)
270 if (rdwr & RRD_COPY) {
271 /* We will read everything in a moment (copying) */
272 madvise(data, rrd_file->file_len, MADV_WILLNEED | MADV_SEQUENTIAL);
274 /* We do not need to read anything in for the moment */
275 madvise(data, rrd_file->file_len, MADV_RANDOM);
276 /* the stat_head will be needed soonish, so hint accordingly */
277 madvise(data, sizeof(stat_head_t), MADV_WILLNEED | MADV_RANDOM);
281 __rrd_read(rrd->stat_head, stat_head_t,
284 /* lets do some test if we are on track ... */
285 if (memcmp(rrd->stat_head->cookie, RRD_COOKIE, sizeof(RRD_COOKIE)) != 0) {
286 rrd_set_error("'%s' is not an RRD file", file_name);
287 goto out_nullify_head;
290 if (rrd->stat_head->float_cookie != FLOAT_COOKIE) {
291 rrd_set_error("This RRD was created on another architecture");
292 goto out_nullify_head;
295 version = atoi(rrd->stat_head->version);
297 if (version > atoi(RRD_VERSION)) {
298 rrd_set_error("can't handle RRD file version %s",
299 rrd->stat_head->version);
300 goto out_nullify_head;
302 #if defined USE_MADVISE
303 /* the ds_def will be needed soonish, so hint accordingly */
304 madvise(data + PAGE_START(offset),
305 sizeof(ds_def_t) * rrd->stat_head->ds_cnt, MADV_WILLNEED);
307 __rrd_read(rrd->ds_def, ds_def_t,
308 rrd->stat_head->ds_cnt);
310 #if defined USE_MADVISE
311 /* the rra_def will be needed soonish, so hint accordingly */
312 madvise(data + PAGE_START(offset),
313 sizeof(rra_def_t) * rrd->stat_head->rra_cnt, MADV_WILLNEED);
315 __rrd_read(rrd->rra_def, rra_def_t,
316 rrd->stat_head->rra_cnt);
318 /* handle different format for the live_head */
320 rrd->live_head = (live_head_t *) malloc(sizeof(live_head_t));
321 if (rrd->live_head == NULL) {
322 rrd_set_error("live_head_t malloc");
325 #if defined USE_MADVISE
326 /* the live_head will be needed soonish, so hint accordingly */
327 madvise(data + PAGE_START(offset), sizeof(time_t), MADV_WILLNEED);
329 __rrd_read(rrd->legacy_last_up, time_t,
332 rrd->live_head->last_up = *rrd->legacy_last_up;
333 rrd->live_head->last_up_usec = 0;
335 #if defined USE_MADVISE
336 /* the live_head will be needed soonish, so hint accordingly */
337 madvise(data + PAGE_START(offset),
338 sizeof(live_head_t), MADV_WILLNEED);
340 __rrd_read(rrd->live_head, live_head_t,
343 __rrd_read(rrd->pdp_prep, pdp_prep_t,
344 rrd->stat_head->ds_cnt);
345 __rrd_read(rrd->cdp_prep, cdp_prep_t,
346 rrd->stat_head->rra_cnt * rrd->stat_head->ds_cnt);
347 __rrd_read(rrd->rra_ptr, rra_ptr_t,
348 rrd->stat_head->rra_cnt);
350 rrd_file->header_len = offset;
351 rrd_file->pos = offset;
354 unsigned long row_cnt = 0;
356 for (ui=0; ui<rrd->stat_head->rra_cnt; ui++)
357 row_cnt += rrd->rra_def[ui].row_cnt;
359 size_t correct_len = rrd_file->header_len +
360 sizeof(rrd_value_t) * row_cnt * rrd->stat_head->ds_cnt;
362 if (correct_len > rrd_file->file_len)
364 rrd_set_error("'%s' is too small (should be %ld bytes)",
365 file_name, (long long) correct_len);
366 goto out_nullify_head;
373 rrd->stat_head = NULL;
376 if (data != MAP_FAILED)
377 munmap(data, rrd_file->file_len);
380 close(rrd_simple_file->fd);
388 #if defined DEBUG && DEBUG > 1
389 /* Print list of in-core pages of a the current rrd_file. */
392 rrd_file_t *rrd_file,
395 rrd_simple_file_t *rrd_simple_file;
396 rrd_simple_file = (rrd_simple_file_t *)rrd_file->pvt;
398 /* pretty print blocks in core */
401 ssize_t _page_size = sysconf(_SC_PAGESIZE);
403 off = rrd_file->file_len +
404 ((rrd_file->file_len + _page_size - 1) / _page_size);
408 if (mincore(rrd_simple_file->file_start, rrd_file->file_len, vec) == 0) {
410 unsigned is_in = 0, was_in = 0;
412 for (off = 0, prev = 0; off < rrd_file->file_len; ++off) {
413 is_in = vec[off] & 1; /* if lsb set then is core resident */
416 if (was_in != is_in) {
417 fprintf(stderr, "%s: %sin core: %p len %ld\n", mark,
418 was_in ? "" : "not ", vec + prev, off - prev);
424 "%s: %sin core: %p len %ld\n", mark,
425 was_in ? "" : "not ", vec + prev, off - prev);
427 fprintf(stderr, "mincore: %s", rrd_strerror(errno));
430 fprintf(stderr, "sorry mincore only works with mmap");
433 #endif /* defined DEBUG && DEBUG > 1 */
436 * get exclusive lock to whole file.
437 * lock gets removed when we close the file
439 * returns 0 on success
442 rrd_file_t *rrd_file)
445 rrd_simple_file_t *rrd_simple_file;
446 rrd_simple_file = (rrd_simple_file_t *)rrd_file->pvt;
449 #if defined(_WIN32) && !defined(__CYGWIN__) && !defined(__CYGWIN32__)
452 if (_fstat(rrd_simple_file->fd, &st) == 0) {
453 rcstat = _locking(rrd_simple_file->fd, _LK_NBLCK, st.st_size);
460 lock.l_type = F_WRLCK; /* exclusive write lock */
461 lock.l_len = 0; /* whole file */
462 lock.l_start = 0; /* start of file */
463 lock.l_whence = SEEK_SET; /* end of file */
465 rcstat = fcntl(rrd_simple_file->fd, F_SETLK, &lock);
473 /* drop cache except for the header and the active pages */
475 rrd_file_t *rrd_file,
478 rrd_simple_file_t *rrd_simple_file = (rrd_simple_file_t *)rrd_file->pvt;
479 #if defined USE_MADVISE || defined HAVE_POSIX_FADVISE
480 size_t dontneed_start;
484 ssize_t _page_size = sysconf(_SC_PAGESIZE);
486 if (rrd_file == NULL) {
487 #if defined DEBUG && DEBUG
488 fprintf (stderr, "rrd_dontneed: Argument 'rrd_file' is NULL.\n");
493 #if defined DEBUG && DEBUG > 1
494 mincore_print(rrd_file, "before");
497 /* ignoring errors from RRDs that are smaller then the file_len+rounding */
498 rra_start = rrd_file->header_len;
499 dontneed_start = PAGE_START(rra_start) + _page_size;
500 for (i = 0; i < rrd->stat_head->rra_cnt; ++i) {
503 + rrd->rra_ptr[i].cur_row
504 * rrd->stat_head->ds_cnt * sizeof(rrd_value_t));
505 if (active_block > dontneed_start) {
507 madvise(rrd_simple_file->file_start + dontneed_start,
508 active_block - dontneed_start - 1, MADV_DONTNEED);
510 /* in linux at least only fadvise DONTNEED seems to purge pages from cache */
511 #ifdef HAVE_POSIX_FADVISE
512 posix_fadvise(rrd_simple_file->fd, dontneed_start,
513 active_block - dontneed_start - 1,
514 POSIX_FADV_DONTNEED);
517 dontneed_start = active_block;
518 /* do not release 'hot' block if update for this RAA will occur
519 * within 10 minutes */
520 if (rrd->stat_head->pdp_step * rrd->rra_def[i].pdp_cnt -
521 rrd->live_head->last_up % (rrd->stat_head->pdp_step *
522 rrd->rra_def[i].pdp_cnt) < 10 * 60) {
523 dontneed_start += _page_size;
526 rrd->rra_def[i].row_cnt * rrd->stat_head->ds_cnt *
530 if (dontneed_start < rrd_file->file_len) {
532 madvise(rrd_simple_file->file_start + dontneed_start,
533 rrd_file->file_len - dontneed_start, MADV_DONTNEED);
535 #ifdef HAVE_POSIX_FADVISE
536 posix_fadvise(rrd_simple_file->fd, dontneed_start,
537 rrd_file->file_len - dontneed_start,
538 POSIX_FADV_DONTNEED);
542 #if defined DEBUG && DEBUG > 1
543 mincore_print(rrd_file, "after");
545 #endif /* without madvise and posix_fadvise it does not make much sense todo anything */
553 rrd_file_t *rrd_file)
555 rrd_simple_file_t *rrd_simple_file;
556 rrd_simple_file = (rrd_simple_file_t *)rrd_file->pvt;
560 ret = msync(rrd_simple_file->file_start, rrd_file->file_len, MS_ASYNC);
562 rrd_set_error("msync rrd_file: %s", rrd_strerror(errno));
563 ret = munmap(rrd_simple_file->file_start, rrd_file->file_len);
565 rrd_set_error("munmap rrd_file: %s", rrd_strerror(errno));
567 ret = close(rrd_simple_file->fd);
569 rrd_set_error("closing file: %s", rrd_strerror(errno));
577 /* Set position of rrd_file. */
580 rrd_file_t *rrd_file,
585 rrd_simple_file_t *rrd_simple_file;
586 rrd_simple_file = (rrd_simple_file_t *)rrd_file->pvt;
589 if (whence == SEEK_SET)
591 else if (whence == SEEK_CUR)
592 rrd_file->pos += off;
593 else if (whence == SEEK_END)
594 rrd_file->pos = rrd_file->file_len + off;
596 ret = lseek(rrd_simple_file->fd, off, whence);
598 rrd_set_error("lseek: %s", rrd_strerror(errno));
601 /* mimic fseek, which returns 0 upon success */
602 return ret < 0; /*XXX: or just ret to mimic lseek */
606 /* Get current position in rrd_file. */
609 rrd_file_t *rrd_file)
611 return rrd_file->pos;
615 /* Read count bytes into buffer buf, starting at rrd_file->pos.
616 * Returns the number of bytes read or <0 on error. */
619 rrd_file_t *rrd_file,
623 rrd_simple_file_t *rrd_simple_file = (rrd_simple_file_t *)rrd_file->pvt;
628 if (rrd_file->pos > rrd_file->file_len || _cnt == 0) /* EOF */
631 return -1; /* EINVAL */
632 _surplus = rrd_file->pos + _cnt - rrd_file->file_len;
633 if (_surplus > 0) { /* short read */
638 buf = memcpy(buf, rrd_simple_file->file_start + rrd_file->pos, _cnt);
640 rrd_file->pos += _cnt; /* mimmic read() semantics */
645 ret = read(rrd_simple_file->fd, buf, count);
647 rrd_file->pos += ret; /* mimmic read() semantics */
653 /* Write count bytes from buffer buf to the current position
654 * rrd_file->pos of rrd_simple_file->fd.
655 * Returns the number of bytes written or <0 on error. */
658 rrd_file_t *rrd_file,
662 rrd_simple_file_t *rrd_simple_file = (rrd_simple_file_t *)rrd_file->pvt;
664 size_t old_size = rrd_file->file_len;
668 return -1; /* EINVAL */
670 if((rrd_file->pos + count) > old_size)
672 rrd_set_error("attempting to write beyond end of file");
675 memcpy(rrd_simple_file->file_start + rrd_file->pos, buf, count);
676 rrd_file->pos += count;
677 return count; /* mimmic write() semantics */
679 ssize_t _sz = write(rrd_simple_file->fd, buf, count);
682 rrd_file->pos += _sz;
688 /* Initialize RRD header. */
693 rrd->stat_head = NULL;
696 rrd->live_head = NULL;
697 rrd->legacy_last_up = NULL;
699 rrd->pdp_prep = NULL;
700 rrd->cdp_prep = NULL;
701 rrd->rrd_value = NULL;
705 /* free RRD header data. */
711 if (rrd->legacy_last_up) { /* this gets set for version < 3 only */
712 free(rrd->live_head);
719 free(rrd->live_head);
720 free(rrd->stat_head);
726 free(rrd->rrd_value);
731 /* routine used by external libraries to free memory allocated by
741 * rra_update informs us about the RRAs being updated
742 * The low level storage API may use this information for
743 * aligning RRAs within stripes, or other performance enhancements
746 rrd_file_t *rrd_file __attribute__((unused)),
747 int rra_idx __attribute__((unused)),
748 unsigned long rra_row __attribute__((unused)),
749 time_t rra_time __attribute__((unused)))
754 * This function is called when creating a new RRD
755 * The storage implementation can use this opportunity to select
756 * a sensible starting row within the file.
757 * The default implementation is random, to ensure that all RRAs
758 * don't change to a new disk block at the same time
760 unsigned long rrd_select_initial_row(
761 rrd_file_t *rrd_file __attribute__((unused)),
762 int rra_idx __attribute__((unused)),
766 return rra_random_row(rra);
769 static int rand_init = 0;
771 long int rra_random_row(
775 srandom((unsigned int) time(NULL) + (unsigned int) getpid());
779 return random() % rra->row_cnt;