1 /*****************************************************************************
2 * RRDtool 1.3.2 Copyright by Tobi Oetiker, 1997-2008
3 *****************************************************************************
4 * rrd_open.c Open an RRD File
5 *****************************************************************************
7 *****************************************************************************/
19 #define _LK_UNLCK 0 /* Unlock */
20 #define _LK_LOCK 1 /* Lock */
21 #define _LK_NBLCK 2 /* Non-blocking lock */
22 #define _LK_RLCK 3 /* Lock for read only */
23 #define _LK_NBRLCK 4 /* Non-blocking lock for read only */
26 #define LK_UNLCK _LK_UNLCK
27 #define LK_LOCK _LK_LOCK
28 #define LK_NBLCK _LK_NBLCK
29 #define LK_RLCK _LK_RLCK
30 #define LK_NBRLCK _LK_NBRLCK
33 /* DEBUG 2 prints information obtained via mincore(2) */
35 /* do not calculate exact madvise hints but assume 1 page for headers and
36 * set DONTNEED for the rest, which is assumed to be data */
37 /* Avoid calling madvise on areas that were already hinted. May be benefical if
38 * your syscalls are very slow */
41 /* the cast to void* is there to avoid this warning seen on ia64 with certain
42 versions of gcc: 'cast increases required alignment of target type'
44 #define __rrd_read(dst, dst_t, cnt) { \
45 size_t wanted = sizeof(dst_t)*(cnt); \
46 if (offset + wanted > rrd_file->file_len) { \
47 rrd_set_error("reached EOF while loading header " #dst); \
48 goto out_nullify_head; \
50 (dst) = (dst_t*)(void*) (data + offset); \
54 #define __rrd_read(dst, dst_t, cnt) { \
55 size_t wanted = sizeof(dst_t)*(cnt); \
57 if ((dst = (dst_t*)malloc(wanted)) == NULL) { \
58 rrd_set_error(#dst " malloc"); \
59 goto out_nullify_head; \
61 got = read (rrd_simple_file->fd, dst, wanted); \
62 if (got != wanted) { \
63 rrd_set_error("short read while reading header " #dst); \
64 goto out_nullify_head; \
70 /* get the address of the start of this page */
71 #if defined USE_MADVISE || defined HAVE_POSIX_FADVISE
73 #define PAGE_START(addr) ((addr)&(~(_page_size-1)))
77 /* Open a database file, return its header and an open filehandle,
78 * positioned to the first cdp in the first rra.
79 * In the error path of rrd_open, only rrd_free(&rrd) has to be called
80 * before returning an error. Do not call rrd_close upon failure of rrd_open.
81 * If creating a new file, the parameter rrd must be initialised with
82 * details of the file content.
83 * If opening an existing file, then use rrd must be initialised by
84 * rrd_init(rrd) prior to invoking rrd_open
88 const char *const file_name,
97 ssize_t _page_size = sysconf(_SC_PAGESIZE);
98 char *data = MAP_FAILED;
102 rrd_file_t *rrd_file = NULL;
103 rrd_simple_file_t *rrd_simple_file = NULL;
104 size_t newfile_size = 0;
105 size_t header_len, value_cnt, data_len;
107 /* Are we creating a new file? */
108 if((rdwr & RRD_CREAT) && (rrd->stat_head != NULL))
110 header_len = rrd_get_header_size(rrd);
113 for (ui = 0; ui < rrd->stat_head->rra_cnt; ui++)
114 value_cnt += rrd->stat_head->ds_cnt * rrd->rra_def[ui].row_cnt;
116 data_len = sizeof(rrd_value_t) * value_cnt;
118 newfile_size = header_len + data_len;
121 rrd_file = (rrd_file_t*)malloc(sizeof(rrd_file_t));
122 if (rrd_file == NULL) {
123 rrd_set_error("allocating rrd_file descriptor for '%s'", file_name);
126 memset(rrd_file, 0, sizeof(rrd_file_t));
128 rrd_file->pvt = malloc(sizeof(rrd_simple_file_t));
129 if(rrd_file->pvt == NULL) {
130 rrd_set_error("allocating rrd_simple_file for '%s'", file_name);
133 memset(rrd_file->pvt, 0, sizeof(rrd_simple_file_t));
134 rrd_simple_file = (rrd_simple_file_t *)rrd_file->pvt;
137 if ((rdwr & (RRD_READONLY | RRD_READWRITE)) ==
138 (RRD_READONLY | RRD_READWRITE)) {
139 /* Both READONLY and READWRITE were given, which is invalid. */
140 rrd_set_error("in read/write request mask");
146 rrd_simple_file->mm_prot = PROT_READ;
147 rrd_simple_file->mm_flags = 0;
150 if (rdwr & RRD_READONLY) {
153 rrd_simple_file->mm_flags = MAP_PRIVATE;
154 # ifdef MAP_NORESERVE
155 rrd_simple_file->mm_flags |= MAP_NORESERVE; /* readonly, so no swap backing needed */
159 if (rdwr & RRD_READWRITE) {
162 rrd_simple_file->mm_flags = MAP_SHARED;
163 rrd_simple_file->mm_prot |= PROT_WRITE;
166 if (rdwr & RRD_CREAT) {
167 flags |= (O_CREAT | O_TRUNC);
170 if (rdwr & RRD_READAHEAD) {
172 rrd_simple_file->mm_flags |= MAP_POPULATE; /* populate ptes and data */
174 #if defined MAP_NONBLOCK
175 rrd_simple_file->mm_flags |= MAP_NONBLOCK; /* just populate ptes */
178 #if defined(_WIN32) && !defined(__CYGWIN__) && !defined(__CYGWIN32__)
182 if ((rrd_simple_file->fd = open(file_name, flags, 0666)) < 0) {
183 rrd_set_error("opening '%s': %s", file_name, rrd_strerror(errno));
188 #ifdef HAVE_BROKEN_MS_ASYNC
189 if (rdwr & RRD_READWRITE) {
190 /* some unices, the files mtime does not get update
191 on msync MS_ASYNC, in order to help them,
192 we update the the timestamp at this point.
193 The thing happens pretty 'close' to the open
194 call so the chances of a race should be minimal.
196 Maybe ask your vendor to fix your OS ... */
197 utime(file_name,NULL);
202 /* Better try to avoid seeks as much as possible. stat may be heavy but
203 * many concurrent seeks are even worse. */
204 if (newfile_size == 0 && ((fstat(rrd_simple_file->fd, &statb)) < 0)) {
205 rrd_set_error("fstat '%s': %s", file_name, rrd_strerror(errno));
208 if (newfile_size == 0) {
209 rrd_file->file_len = statb.st_size;
211 rrd_file->file_len = newfile_size;
212 lseek(rrd_simple_file->fd, newfile_size - 1, SEEK_SET);
213 write(rrd_simple_file->fd, "\0", 1); /* poke */
214 lseek(rrd_simple_file->fd, 0, SEEK_SET);
216 #ifdef HAVE_POSIX_FADVISE
217 /* In general we need no read-ahead when dealing with rrd_files.
218 When we stop reading, it is highly unlikely that we start up again.
219 In this manner we actually save time and diskaccess (and buffer cache).
220 Thanks to Dave Plonka for the Idea of using POSIX_FADV_RANDOM here. */
221 posix_fadvise(rrd_simple_file->fd, 0, 0, POSIX_FADV_RANDOM);
225 if (rdwr & RRD_READWRITE)
227 if (setvbuf((rrd_simple_file->fd),NULL,_IONBF,2)) {
228 rrd_set_error("failed to disable the stream buffer\n");
235 data = mmap(0, rrd_file->file_len,
236 rrd_simple_file->mm_prot, rrd_simple_file->mm_flags,
237 rrd_simple_file->fd, offset);
239 /* lets see if the first read worked */
240 if (data == MAP_FAILED) {
241 rrd_set_error("mmaping file '%s': %s", file_name,
242 rrd_strerror(errno));
245 rrd_simple_file->file_start = data;
246 if (rdwr & RRD_CREAT) {
247 memset(data, DNAN, newfile_size - 1);
251 if (rdwr & RRD_CREAT)
254 if (rdwr & RRD_COPY) {
255 /* We will read everything in a moment (copying) */
256 madvise(data, rrd_file->file_len, MADV_WILLNEED | MADV_SEQUENTIAL);
258 /* We do not need to read anything in for the moment */
259 madvise(data, rrd_file->file_len, MADV_RANDOM);
260 /* the stat_head will be needed soonish, so hint accordingly */
261 madvise(data, sizeof(stat_head_t), MADV_WILLNEED | MADV_RANDOM);
265 __rrd_read(rrd->stat_head, stat_head_t,
268 /* lets do some test if we are on track ... */
269 if (memcmp(rrd->stat_head->cookie, RRD_COOKIE, sizeof(RRD_COOKIE)) != 0) {
270 rrd_set_error("'%s' is not an RRD file", file_name);
271 goto out_nullify_head;
274 if (rrd->stat_head->float_cookie != FLOAT_COOKIE) {
275 rrd_set_error("This RRD was created on another architecture");
276 goto out_nullify_head;
279 version = atoi(rrd->stat_head->version);
281 if (version > atoi(RRD_VERSION)) {
282 rrd_set_error("can't handle RRD file version %s",
283 rrd->stat_head->version);
284 goto out_nullify_head;
286 #if defined USE_MADVISE
287 /* the ds_def will be needed soonish, so hint accordingly */
288 madvise(data + PAGE_START(offset),
289 sizeof(ds_def_t) * rrd->stat_head->ds_cnt, MADV_WILLNEED);
291 __rrd_read(rrd->ds_def, ds_def_t,
292 rrd->stat_head->ds_cnt);
294 #if defined USE_MADVISE
295 /* the rra_def will be needed soonish, so hint accordingly */
296 madvise(data + PAGE_START(offset),
297 sizeof(rra_def_t) * rrd->stat_head->rra_cnt, MADV_WILLNEED);
299 __rrd_read(rrd->rra_def, rra_def_t,
300 rrd->stat_head->rra_cnt);
302 /* handle different format for the live_head */
304 rrd->live_head = (live_head_t *) malloc(sizeof(live_head_t));
305 if (rrd->live_head == NULL) {
306 rrd_set_error("live_head_t malloc");
309 #if defined USE_MADVISE
310 /* the live_head will be needed soonish, so hint accordingly */
311 madvise(data + PAGE_START(offset), sizeof(time_t), MADV_WILLNEED);
313 __rrd_read(rrd->legacy_last_up, time_t,
316 rrd->live_head->last_up = *rrd->legacy_last_up;
317 rrd->live_head->last_up_usec = 0;
319 #if defined USE_MADVISE
320 /* the live_head will be needed soonish, so hint accordingly */
321 madvise(data + PAGE_START(offset),
322 sizeof(live_head_t), MADV_WILLNEED);
324 __rrd_read(rrd->live_head, live_head_t,
327 __rrd_read(rrd->pdp_prep, pdp_prep_t,
328 rrd->stat_head->ds_cnt);
329 __rrd_read(rrd->cdp_prep, cdp_prep_t,
330 rrd->stat_head->rra_cnt * rrd->stat_head->ds_cnt);
331 __rrd_read(rrd->rra_ptr, rra_ptr_t,
332 rrd->stat_head->rra_cnt);
334 rrd_file->header_len = offset;
335 rrd_file->pos = offset;
338 unsigned long row_cnt = 0;
340 for (ui=0; ui<rrd->stat_head->rra_cnt; ui++)
341 row_cnt += rrd->rra_def[ui].row_cnt;
343 size_t correct_len = rrd_file->header_len +
344 sizeof(rrd_value_t) * row_cnt * rrd->stat_head->ds_cnt;
346 if (correct_len > rrd_file->file_len)
348 rrd_set_error("'%s' is too small (should be %ld bytes)",
349 file_name, (long long) correct_len);
350 goto out_nullify_head;
357 rrd->stat_head = NULL;
360 if (data != MAP_FAILED)
361 munmap(data, rrd_file->file_len);
364 close(rrd_simple_file->fd);
372 #if defined DEBUG && DEBUG > 1
373 /* Print list of in-core pages of a the current rrd_file. */
376 rrd_file_t *rrd_file,
379 rrd_simple_file_t *rrd_simple_file;
380 rrd_simple_file = (rrd_simple_file_t *)rrd_file->pvt;
382 /* pretty print blocks in core */
385 ssize_t _page_size = sysconf(_SC_PAGESIZE);
387 off = rrd_file->file_len +
388 ((rrd_file->file_len + _page_size - 1) / _page_size);
392 if (mincore(rrd_simple_file->file_start, rrd_file->file_len, vec) == 0) {
394 unsigned is_in = 0, was_in = 0;
396 for (off = 0, prev = 0; off < rrd_file->file_len; ++off) {
397 is_in = vec[off] & 1; /* if lsb set then is core resident */
400 if (was_in != is_in) {
401 fprintf(stderr, "%s: %sin core: %p len %ld\n", mark,
402 was_in ? "" : "not ", vec + prev, off - prev);
408 "%s: %sin core: %p len %ld\n", mark,
409 was_in ? "" : "not ", vec + prev, off - prev);
411 fprintf(stderr, "mincore: %s", rrd_strerror(errno));
414 fprintf(stderr, "sorry mincore only works with mmap");
417 #endif /* defined DEBUG && DEBUG > 1 */
420 * get exclusive lock to whole file.
421 * lock gets removed when we close the file
423 * returns 0 on success
426 rrd_file_t *rrd_file)
429 rrd_simple_file_t *rrd_simple_file;
430 rrd_simple_file = (rrd_simple_file_t *)rrd_file->pvt;
433 #if defined(_WIN32) && !defined(__CYGWIN__) && !defined(__CYGWIN32__)
436 if (_fstat(rrd_simple_file->fd, &st) == 0) {
437 rcstat = _locking(rrd_simple_file->fd, _LK_NBLCK, st.st_size);
444 lock.l_type = F_WRLCK; /* exclusive write lock */
445 lock.l_len = 0; /* whole file */
446 lock.l_start = 0; /* start of file */
447 lock.l_whence = SEEK_SET; /* end of file */
449 rcstat = fcntl(rrd_simple_file->fd, F_SETLK, &lock);
457 /* drop cache except for the header and the active pages */
459 rrd_file_t *rrd_file,
462 rrd_simple_file_t *rrd_simple_file = (rrd_simple_file_t *)rrd_file->pvt;
463 #if defined USE_MADVISE || defined HAVE_POSIX_FADVISE
464 size_t dontneed_start;
468 ssize_t _page_size = sysconf(_SC_PAGESIZE);
470 if (rrd_file == NULL) {
471 #if defined DEBUG && DEBUG
472 fprintf (stderr, "rrd_dontneed: Argument 'rrd_file' is NULL.\n");
477 #if defined DEBUG && DEBUG > 1
478 mincore_print(rrd_file, "before");
481 /* ignoring errors from RRDs that are smaller then the file_len+rounding */
482 rra_start = rrd_file->header_len;
483 dontneed_start = PAGE_START(rra_start) + _page_size;
484 for (i = 0; i < rrd->stat_head->rra_cnt; ++i) {
487 + rrd->rra_ptr[i].cur_row
488 * rrd->stat_head->ds_cnt * sizeof(rrd_value_t));
489 if (active_block > dontneed_start) {
491 madvise(rrd_simple_file->file_start + dontneed_start,
492 active_block - dontneed_start - 1, MADV_DONTNEED);
494 /* in linux at least only fadvise DONTNEED seems to purge pages from cache */
495 #ifdef HAVE_POSIX_FADVISE
496 posix_fadvise(rrd_simple_file->fd, dontneed_start,
497 active_block - dontneed_start - 1,
498 POSIX_FADV_DONTNEED);
501 dontneed_start = active_block;
502 /* do not release 'hot' block if update for this RAA will occur
503 * within 10 minutes */
504 if (rrd->stat_head->pdp_step * rrd->rra_def[i].pdp_cnt -
505 rrd->live_head->last_up % (rrd->stat_head->pdp_step *
506 rrd->rra_def[i].pdp_cnt) < 10 * 60) {
507 dontneed_start += _page_size;
510 rrd->rra_def[i].row_cnt * rrd->stat_head->ds_cnt *
514 if (dontneed_start < rrd_file->file_len) {
516 madvise(rrd_simple_file->file_start + dontneed_start,
517 rrd_file->file_len - dontneed_start, MADV_DONTNEED);
519 #ifdef HAVE_POSIX_FADVISE
520 posix_fadvise(rrd_simple_file->fd, dontneed_start,
521 rrd_file->file_len - dontneed_start,
522 POSIX_FADV_DONTNEED);
526 #if defined DEBUG && DEBUG > 1
527 mincore_print(rrd_file, "after");
529 #endif /* without madvise and posix_fadvise it does not make much sense todo anything */
537 rrd_file_t *rrd_file)
539 rrd_simple_file_t *rrd_simple_file;
540 rrd_simple_file = (rrd_simple_file_t *)rrd_file->pvt;
544 ret = msync(rrd_simple_file->file_start, rrd_file->file_len, MS_ASYNC);
546 rrd_set_error("msync rrd_file: %s", rrd_strerror(errno));
547 ret = munmap(rrd_simple_file->file_start, rrd_file->file_len);
549 rrd_set_error("munmap rrd_file: %s", rrd_strerror(errno));
551 ret = close(rrd_simple_file->fd);
553 rrd_set_error("closing file: %s", rrd_strerror(errno));
561 /* Set position of rrd_file. */
564 rrd_file_t *rrd_file,
569 rrd_simple_file_t *rrd_simple_file;
570 rrd_simple_file = (rrd_simple_file_t *)rrd_file->pvt;
573 if (whence == SEEK_SET)
575 else if (whence == SEEK_CUR)
576 rrd_file->pos += off;
577 else if (whence == SEEK_END)
578 rrd_file->pos = rrd_file->file_len + off;
580 ret = lseek(rrd_simple_file->fd, off, whence);
582 rrd_set_error("lseek: %s", rrd_strerror(errno));
585 /* mimic fseek, which returns 0 upon success */
586 return ret < 0; /*XXX: or just ret to mimic lseek */
590 /* Get current position in rrd_file. */
593 rrd_file_t *rrd_file)
595 return rrd_file->pos;
599 /* Read count bytes into buffer buf, starting at rrd_file->pos.
600 * Returns the number of bytes read or <0 on error. */
603 rrd_file_t *rrd_file,
607 rrd_simple_file_t *rrd_simple_file = (rrd_simple_file_t *)rrd_file->pvt;
612 if (rrd_file->pos > rrd_file->file_len || _cnt == 0) /* EOF */
615 return -1; /* EINVAL */
616 _surplus = rrd_file->pos + _cnt - rrd_file->file_len;
617 if (_surplus > 0) { /* short read */
622 buf = memcpy(buf, rrd_simple_file->file_start + rrd_file->pos, _cnt);
624 rrd_file->pos += _cnt; /* mimmic read() semantics */
629 ret = read(rrd_simple_file->fd, buf, count);
631 rrd_file->pos += ret; /* mimmic read() semantics */
637 /* Write count bytes from buffer buf to the current position
638 * rrd_file->pos of rrd_simple_file->fd.
639 * Returns the number of bytes written or <0 on error. */
642 rrd_file_t *rrd_file,
646 rrd_simple_file_t *rrd_simple_file = (rrd_simple_file_t *)rrd_file->pvt;
648 size_t old_size = rrd_file->file_len;
652 return -1; /* EINVAL */
654 if((rrd_file->pos + count) > old_size)
656 rrd_set_error("attempting to write beyond end of file");
659 memcpy(rrd_simple_file->file_start + rrd_file->pos, buf, count);
660 rrd_file->pos += count;
661 return count; /* mimmic write() semantics */
663 ssize_t _sz = write(rrd_simple_file->fd, buf, count);
666 rrd_file->pos += _sz;
672 /* Initialize RRD header. */
677 rrd->stat_head = NULL;
680 rrd->live_head = NULL;
681 rrd->legacy_last_up = NULL;
683 rrd->pdp_prep = NULL;
684 rrd->cdp_prep = NULL;
685 rrd->rrd_value = NULL;
689 /* free RRD header data. */
695 if (rrd->legacy_last_up) { /* this gets set for version < 3 only */
696 free(rrd->live_head);
703 free(rrd->live_head);
704 free(rrd->stat_head);
710 free(rrd->rrd_value);
715 /* routine used by external libraries to free memory allocated by
725 * rra_update informs us about the RRAs being updated
726 * The low level storage API may use this information for
727 * aligning RRAs within stripes, or other performance enhancements
730 rrd_file_t *rrd_file __attribute__((unused)),
731 int rra_idx __attribute__((unused)),
732 unsigned long rra_row __attribute__((unused)),
733 time_t rra_time __attribute__((unused)))
738 * This function is called when creating a new RRD
739 * The storage implementation can use this opportunity to select
740 * a sensible starting row within the file.
741 * The default implementation is random, to ensure that all RRAs
742 * don't change to a new disk block at the same time
744 unsigned long rrd_select_initial_row(
745 rrd_file_t *rrd_file __attribute__((unused)),
746 int rra_idx __attribute__((unused)),
750 return rrd_random() % rra->row_cnt;