1 /*****************************************************************************
2 * RRDtool 1.3.2 Copyright by Tobi Oetiker, 1997-2008
3 *****************************************************************************
4 * rrd_open.c Open an RRD File
5 *****************************************************************************
7 *****************************************************************************/
19 #define _LK_UNLCK 0 /* Unlock */
20 #define _LK_LOCK 1 /* Lock */
21 #define _LK_NBLCK 2 /* Non-blocking lock */
22 #define _LK_RLCK 3 /* Lock for read only */
23 #define _LK_NBRLCK 4 /* Non-blocking lock for read only */
26 #define LK_UNLCK _LK_UNLCK
27 #define LK_LOCK _LK_LOCK
28 #define LK_NBLCK _LK_NBLCK
29 #define LK_RLCK _LK_RLCK
30 #define LK_NBRLCK _LK_NBRLCK
33 /* DEBUG 2 prints information obtained via mincore(2) */
35 /* do not calculate exact madvise hints but assume 1 page for headers and
36 * set DONTNEED for the rest, which is assumed to be data */
37 /* Avoid calling madvise on areas that were already hinted. May be benefical if
38 * your syscalls are very slow */
41 /* the cast to void* is there to avoid this warning seen on ia64 with certain
42 versions of gcc: 'cast increases required alignment of target type'
44 #define __rrd_read(dst, dst_t, cnt) { \
45 size_t wanted = sizeof(dst_t)*(cnt); \
46 if (offset + wanted > rrd_file->file_len) { \
47 rrd_set_error("reached EOF while loading header " #dst); \
48 goto out_nullify_head; \
50 (dst) = (dst_t*)(void*) (data + offset); \
54 #define __rrd_read(dst, dst_t, cnt) { \
55 size_t wanted = sizeof(dst_t)*(cnt); \
57 if ((dst = (dst_t*)malloc(wanted)) == NULL) { \
58 rrd_set_error(#dst " malloc"); \
59 goto out_nullify_head; \
61 got = read (rrd_simple_file->fd, dst, wanted); \
62 if (got != wanted) { \
63 rrd_set_error("short read while reading header " #dst); \
64 goto out_nullify_head; \
70 /* get the address of the start of this page */
71 #if defined USE_MADVISE || defined HAVE_POSIX_FADVISE
73 #define PAGE_START(addr) ((addr)&(~(_page_size-1)))
77 /* Open a database file, return its header and an open filehandle,
78 * positioned to the first cdp in the first rra.
79 * In the error path of rrd_open, only rrd_free(&rrd) has to be called
80 * before returning an error. Do not call rrd_close upon failure of rrd_open.
81 * If creating a new file, the parameter rrd must be initialised with
82 * details of the file content.
83 * If opening an existing file, then use rrd must be initialised by
84 * rrd_init(rrd) prior to invoking rrd_open
88 const char *const file_name,
97 ssize_t _page_size = sysconf(_SC_PAGESIZE);
98 char *data = MAP_FAILED;
102 rrd_file_t *rrd_file = NULL;
103 rrd_simple_file_t *rrd_simple_file = NULL;
104 size_t newfile_size = 0;
105 size_t header_len, value_cnt, data_len;
107 /* Are we creating a new file? */
108 if((rdwr & RRD_CREAT) && (rrd->stat_head != NULL))
111 sizeof(stat_head_t) + \
112 sizeof(ds_def_t) * rrd->stat_head->ds_cnt + \
113 sizeof(rra_def_t) * rrd->stat_head->rra_cnt + \
115 sizeof(live_head_t) + \
116 sizeof(pdp_prep_t) * rrd->stat_head->ds_cnt + \
117 sizeof(cdp_prep_t) * rrd->stat_head->ds_cnt * rrd->stat_head->rra_cnt + \
118 sizeof(rra_ptr_t) * rrd->stat_head->rra_cnt;
121 for (ui = 0; ui < rrd->stat_head->rra_cnt; ui++)
122 value_cnt += rrd->stat_head->ds_cnt * rrd->rra_def[ui].row_cnt;
124 data_len = sizeof(rrd_value_t) * value_cnt;
126 newfile_size = header_len + data_len;
129 rrd_file = (rrd_file_t*)malloc(sizeof(rrd_file_t));
130 if (rrd_file == NULL) {
131 rrd_set_error("allocating rrd_file descriptor for '%s'", file_name);
134 memset(rrd_file, 0, sizeof(rrd_file_t));
136 rrd_file->pvt = malloc(sizeof(rrd_simple_file_t));
137 if(rrd_file->pvt == NULL) {
138 rrd_set_error("allocating rrd_simple_file for '%s'", file_name);
141 memset(rrd_file->pvt, 0, sizeof(rrd_simple_file_t));
142 rrd_simple_file = (rrd_simple_file_t *)rrd_file->pvt;
145 if ((rdwr & (RRD_READONLY | RRD_READWRITE)) ==
146 (RRD_READONLY | RRD_READWRITE)) {
147 /* Both READONLY and READWRITE were given, which is invalid. */
148 rrd_set_error("in read/write request mask");
154 rrd_simple_file->mm_prot = PROT_READ;
155 rrd_simple_file->mm_flags = 0;
158 if (rdwr & RRD_READONLY) {
161 rrd_simple_file->mm_flags = MAP_PRIVATE;
162 # ifdef MAP_NORESERVE
163 rrd_simple_file->mm_flags |= MAP_NORESERVE; /* readonly, so no swap backing needed */
167 if (rdwr & RRD_READWRITE) {
170 rrd_simple_file->mm_flags = MAP_SHARED;
171 rrd_simple_file->mm_prot |= PROT_WRITE;
174 if (rdwr & RRD_CREAT) {
175 flags |= (O_CREAT | O_TRUNC);
178 if (rdwr & RRD_READAHEAD) {
180 rrd_simple_file->mm_flags |= MAP_POPULATE; /* populate ptes and data */
182 #if defined MAP_NONBLOCK
183 rrd_simple_file->mm_flags |= MAP_NONBLOCK; /* just populate ptes */
186 #if defined(_WIN32) && !defined(__CYGWIN__) && !defined(__CYGWIN32__)
190 if ((rrd_simple_file->fd = open(file_name, flags, 0666)) < 0) {
191 rrd_set_error("opening '%s': %s", file_name, rrd_strerror(errno));
196 #ifdef HAVE_BROKEN_MS_ASYNC
197 if (rdwr & RRD_READWRITE) {
198 /* some unices, the files mtime does not get update
199 on msync MS_ASYNC, in order to help them,
200 we update the the timestamp at this point.
201 The thing happens pretty 'close' to the open
202 call so the chances of a race should be minimal.
204 Maybe ask your vendor to fix your OS ... */
205 utime(file_name,NULL);
210 /* Better try to avoid seeks as much as possible. stat may be heavy but
211 * many concurrent seeks are even worse. */
212 if (newfile_size == 0 && ((fstat(rrd_simple_file->fd, &statb)) < 0)) {
213 rrd_set_error("fstat '%s': %s", file_name, rrd_strerror(errno));
216 if (newfile_size == 0) {
217 rrd_file->file_len = statb.st_size;
219 rrd_file->file_len = newfile_size;
220 lseek(rrd_simple_file->fd, newfile_size - 1, SEEK_SET);
221 write(rrd_simple_file->fd, "\0", 1); /* poke */
222 lseek(rrd_simple_file->fd, 0, SEEK_SET);
224 #ifdef HAVE_POSIX_FADVISE
225 /* In general we need no read-ahead when dealing with rrd_files.
226 When we stop reading, it is highly unlikely that we start up again.
227 In this manner we actually save time and diskaccess (and buffer cache).
228 Thanks to Dave Plonka for the Idea of using POSIX_FADV_RANDOM here. */
229 posix_fadvise(rrd_simple_file->fd, 0, 0, POSIX_FADV_RANDOM);
233 if (rdwr & RRD_READWRITE)
235 if (setvbuf((rrd_simple_file->fd),NULL,_IONBF,2)) {
236 rrd_set_error("failed to disable the stream buffer\n");
243 data = mmap(0, rrd_file->file_len,
244 rrd_simple_file->mm_prot, rrd_simple_file->mm_flags,
245 rrd_simple_file->fd, offset);
247 /* lets see if the first read worked */
248 if (data == MAP_FAILED) {
249 rrd_set_error("mmaping file '%s': %s", file_name,
250 rrd_strerror(errno));
253 rrd_simple_file->file_start = data;
254 if (rdwr & RRD_CREAT) {
255 memset(data, DNAN, newfile_size - 1);
259 if (rdwr & RRD_CREAT)
262 if (rdwr & RRD_COPY) {
263 /* We will read everything in a moment (copying) */
264 madvise(data, rrd_file->file_len, MADV_WILLNEED | MADV_SEQUENTIAL);
266 /* We do not need to read anything in for the moment */
267 madvise(data, rrd_file->file_len, MADV_RANDOM);
268 /* the stat_head will be needed soonish, so hint accordingly */
269 madvise(data, sizeof(stat_head_t), MADV_WILLNEED | MADV_RANDOM);
273 __rrd_read(rrd->stat_head, stat_head_t,
276 /* lets do some test if we are on track ... */
277 if (memcmp(rrd->stat_head->cookie, RRD_COOKIE, sizeof(RRD_COOKIE)) != 0) {
278 rrd_set_error("'%s' is not an RRD file", file_name);
279 goto out_nullify_head;
282 if (rrd->stat_head->float_cookie != FLOAT_COOKIE) {
283 rrd_set_error("This RRD was created on another architecture");
284 goto out_nullify_head;
287 version = atoi(rrd->stat_head->version);
289 if (version > atoi(RRD_VERSION)) {
290 rrd_set_error("can't handle RRD file version %s",
291 rrd->stat_head->version);
292 goto out_nullify_head;
294 #if defined USE_MADVISE
295 /* the ds_def will be needed soonish, so hint accordingly */
296 madvise(data + PAGE_START(offset),
297 sizeof(ds_def_t) * rrd->stat_head->ds_cnt, MADV_WILLNEED);
299 __rrd_read(rrd->ds_def, ds_def_t,
300 rrd->stat_head->ds_cnt);
302 #if defined USE_MADVISE
303 /* the rra_def will be needed soonish, so hint accordingly */
304 madvise(data + PAGE_START(offset),
305 sizeof(rra_def_t) * rrd->stat_head->rra_cnt, MADV_WILLNEED);
307 __rrd_read(rrd->rra_def, rra_def_t,
308 rrd->stat_head->rra_cnt);
310 /* handle different format for the live_head */
312 rrd->live_head = (live_head_t *) malloc(sizeof(live_head_t));
313 if (rrd->live_head == NULL) {
314 rrd_set_error("live_head_t malloc");
317 #if defined USE_MADVISE
318 /* the live_head will be needed soonish, so hint accordingly */
319 madvise(data + PAGE_START(offset), sizeof(time_t), MADV_WILLNEED);
321 __rrd_read(rrd->legacy_last_up, time_t,
324 rrd->live_head->last_up = *rrd->legacy_last_up;
325 rrd->live_head->last_up_usec = 0;
327 #if defined USE_MADVISE
328 /* the live_head will be needed soonish, so hint accordingly */
329 madvise(data + PAGE_START(offset),
330 sizeof(live_head_t), MADV_WILLNEED);
332 __rrd_read(rrd->live_head, live_head_t,
335 __rrd_read(rrd->pdp_prep, pdp_prep_t,
336 rrd->stat_head->ds_cnt);
337 __rrd_read(rrd->cdp_prep, cdp_prep_t,
338 rrd->stat_head->rra_cnt * rrd->stat_head->ds_cnt);
339 __rrd_read(rrd->rra_ptr, rra_ptr_t,
340 rrd->stat_head->rra_cnt);
342 rrd_file->header_len = offset;
343 rrd_file->pos = offset;
346 unsigned long row_cnt = 0;
348 for (ui=0; ui<rrd->stat_head->rra_cnt; ui++)
349 row_cnt += rrd->rra_def[ui].row_cnt;
351 size_t correct_len = rrd_file->header_len +
352 sizeof(rrd_value_t) * row_cnt * rrd->stat_head->ds_cnt;
354 if (correct_len > rrd_file->file_len)
356 rrd_set_error("'%s' is too small (should be %ld bytes)",
357 file_name, (long long) correct_len);
358 goto out_nullify_head;
365 rrd->stat_head = NULL;
368 if (data != MAP_FAILED)
369 munmap(data, rrd_file->file_len);
372 close(rrd_simple_file->fd);
380 #if defined DEBUG && DEBUG > 1
381 /* Print list of in-core pages of a the current rrd_file. */
384 rrd_file_t *rrd_file,
387 rrd_simple_file_t *rrd_simple_file;
388 rrd_simple_file = (rrd_simple_file_t *)rrd_file->pvt;
390 /* pretty print blocks in core */
393 ssize_t _page_size = sysconf(_SC_PAGESIZE);
395 off = rrd_file->file_len +
396 ((rrd_file->file_len + _page_size - 1) / _page_size);
400 if (mincore(rrd_simple_file->file_start, rrd_file->file_len, vec) == 0) {
402 unsigned is_in = 0, was_in = 0;
404 for (off = 0, prev = 0; off < rrd_file->file_len; ++off) {
405 is_in = vec[off] & 1; /* if lsb set then is core resident */
408 if (was_in != is_in) {
409 fprintf(stderr, "%s: %sin core: %p len %ld\n", mark,
410 was_in ? "" : "not ", vec + prev, off - prev);
416 "%s: %sin core: %p len %ld\n", mark,
417 was_in ? "" : "not ", vec + prev, off - prev);
419 fprintf(stderr, "mincore: %s", rrd_strerror(errno));
422 fprintf(stderr, "sorry mincore only works with mmap");
425 #endif /* defined DEBUG && DEBUG > 1 */
428 * get exclusive lock to whole file.
429 * lock gets removed when we close the file
431 * returns 0 on success
434 rrd_file_t *rrd_file)
437 rrd_simple_file_t *rrd_simple_file;
438 rrd_simple_file = (rrd_simple_file_t *)rrd_file->pvt;
441 #if defined(_WIN32) && !defined(__CYGWIN__) && !defined(__CYGWIN32__)
444 if (_fstat(rrd_simple_file->fd, &st) == 0) {
445 rcstat = _locking(rrd_simple_file->fd, _LK_NBLCK, st.st_size);
452 lock.l_type = F_WRLCK; /* exclusive write lock */
453 lock.l_len = 0; /* whole file */
454 lock.l_start = 0; /* start of file */
455 lock.l_whence = SEEK_SET; /* end of file */
457 rcstat = fcntl(rrd_simple_file->fd, F_SETLK, &lock);
465 /* drop cache except for the header and the active pages */
467 rrd_file_t *rrd_file,
470 rrd_simple_file_t *rrd_simple_file = (rrd_simple_file_t *)rrd_file->pvt;
471 #if defined USE_MADVISE || defined HAVE_POSIX_FADVISE
472 size_t dontneed_start;
476 ssize_t _page_size = sysconf(_SC_PAGESIZE);
478 if (rrd_file == NULL) {
479 #if defined DEBUG && DEBUG
480 fprintf (stderr, "rrd_dontneed: Argument 'rrd_file' is NULL.\n");
485 #if defined DEBUG && DEBUG > 1
486 mincore_print(rrd_file, "before");
489 /* ignoring errors from RRDs that are smaller then the file_len+rounding */
490 rra_start = rrd_file->header_len;
491 dontneed_start = PAGE_START(rra_start) + _page_size;
492 for (i = 0; i < rrd->stat_head->rra_cnt; ++i) {
495 + rrd->rra_ptr[i].cur_row
496 * rrd->stat_head->ds_cnt * sizeof(rrd_value_t));
497 if (active_block > dontneed_start) {
499 madvise(rrd_simple_file->file_start + dontneed_start,
500 active_block - dontneed_start - 1, MADV_DONTNEED);
502 /* in linux at least only fadvise DONTNEED seems to purge pages from cache */
503 #ifdef HAVE_POSIX_FADVISE
504 posix_fadvise(rrd_simple_file->fd, dontneed_start,
505 active_block - dontneed_start - 1,
506 POSIX_FADV_DONTNEED);
509 dontneed_start = active_block;
510 /* do not release 'hot' block if update for this RAA will occur
511 * within 10 minutes */
512 if (rrd->stat_head->pdp_step * rrd->rra_def[i].pdp_cnt -
513 rrd->live_head->last_up % (rrd->stat_head->pdp_step *
514 rrd->rra_def[i].pdp_cnt) < 10 * 60) {
515 dontneed_start += _page_size;
518 rrd->rra_def[i].row_cnt * rrd->stat_head->ds_cnt *
522 if (dontneed_start < rrd_file->file_len) {
524 madvise(rrd_simple_file->file_start + dontneed_start,
525 rrd_file->file_len - dontneed_start, MADV_DONTNEED);
527 #ifdef HAVE_POSIX_FADVISE
528 posix_fadvise(rrd_simple_file->fd, dontneed_start,
529 rrd_file->file_len - dontneed_start,
530 POSIX_FADV_DONTNEED);
534 #if defined DEBUG && DEBUG > 1
535 mincore_print(rrd_file, "after");
537 #endif /* without madvise and posix_fadvise it does not make much sense todo anything */
545 rrd_file_t *rrd_file)
547 rrd_simple_file_t *rrd_simple_file;
548 rrd_simple_file = (rrd_simple_file_t *)rrd_file->pvt;
552 ret = msync(rrd_simple_file->file_start, rrd_file->file_len, MS_ASYNC);
554 rrd_set_error("msync rrd_file: %s", rrd_strerror(errno));
555 ret = munmap(rrd_simple_file->file_start, rrd_file->file_len);
557 rrd_set_error("munmap rrd_file: %s", rrd_strerror(errno));
559 ret = close(rrd_simple_file->fd);
561 rrd_set_error("closing file: %s", rrd_strerror(errno));
569 /* Set position of rrd_file. */
572 rrd_file_t *rrd_file,
577 rrd_simple_file_t *rrd_simple_file;
578 rrd_simple_file = (rrd_simple_file_t *)rrd_file->pvt;
581 if (whence == SEEK_SET)
583 else if (whence == SEEK_CUR)
584 rrd_file->pos += off;
585 else if (whence == SEEK_END)
586 rrd_file->pos = rrd_file->file_len + off;
588 ret = lseek(rrd_simple_file->fd, off, whence);
590 rrd_set_error("lseek: %s", rrd_strerror(errno));
593 /* mimic fseek, which returns 0 upon success */
594 return ret < 0; /*XXX: or just ret to mimic lseek */
598 /* Get current position in rrd_file. */
601 rrd_file_t *rrd_file)
603 return rrd_file->pos;
607 /* Read count bytes into buffer buf, starting at rrd_file->pos.
608 * Returns the number of bytes read or <0 on error. */
611 rrd_file_t *rrd_file,
615 rrd_simple_file_t *rrd_simple_file = (rrd_simple_file_t *)rrd_file->pvt;
620 if (rrd_file->pos > rrd_file->file_len || _cnt == 0) /* EOF */
623 return -1; /* EINVAL */
624 _surplus = rrd_file->pos + _cnt - rrd_file->file_len;
625 if (_surplus > 0) { /* short read */
630 buf = memcpy(buf, rrd_simple_file->file_start + rrd_file->pos, _cnt);
632 rrd_file->pos += _cnt; /* mimmic read() semantics */
637 ret = read(rrd_simple_file->fd, buf, count);
639 rrd_file->pos += ret; /* mimmic read() semantics */
645 /* Write count bytes from buffer buf to the current position
646 * rrd_file->pos of rrd_simple_file->fd.
647 * Returns the number of bytes written or <0 on error. */
650 rrd_file_t *rrd_file,
654 rrd_simple_file_t *rrd_simple_file = (rrd_simple_file_t *)rrd_file->pvt;
656 size_t old_size = rrd_file->file_len;
660 return -1; /* EINVAL */
662 if((rrd_file->pos + count) > old_size)
664 rrd_set_error("attempting to write beyond end of file");
667 memcpy(rrd_simple_file->file_start + rrd_file->pos, buf, count);
668 rrd_file->pos += count;
669 return count; /* mimmic write() semantics */
671 ssize_t _sz = write(rrd_simple_file->fd, buf, count);
674 rrd_file->pos += _sz;
680 /* Initialize RRD header. */
685 rrd->stat_head = NULL;
688 rrd->live_head = NULL;
689 rrd->legacy_last_up = NULL;
691 rrd->pdp_prep = NULL;
692 rrd->cdp_prep = NULL;
693 rrd->rrd_value = NULL;
697 /* free RRD header data. */
703 if (rrd->legacy_last_up) { /* this gets set for version < 3 only */
704 free(rrd->live_head);
711 free(rrd->live_head);
712 free(rrd->stat_head);
718 free(rrd->rrd_value);
723 /* routine used by external libraries to free memory allocated by
733 * rra_update informs us about the RRAs being updated
734 * The low level storage API may use this information for
735 * aligning RRAs within stripes, or other performance enhancements
738 rrd_file_t *rrd_file __attribute__((unused)),
739 int rra_idx __attribute__((unused)),
740 unsigned long rra_row __attribute__((unused)),
741 time_t rra_time __attribute__((unused)))
746 * This function is called when creating a new RRD
747 * The storage implementation can use this opportunity to select
748 * a sensible starting row within the file.
749 * The default implementation is random, to ensure that all RRAs
750 * don't change to a new disk block at the same time
752 unsigned long rrd_select_initial_row(
753 rrd_file_t *rrd_file __attribute__((unused)),
754 int rra_idx __attribute__((unused)),
758 return rrd_random() % rra->row_cnt;