1 /*****************************************************************************
2 * RRDtool 1.3.2 Copyright by Tobi Oetiker, 1997-2008
3 *****************************************************************************
4 * rrd_open.c Open an RRD File
5 *****************************************************************************
7 *****************************************************************************/
13 /* DEBUG 2 prints information obtained via mincore(2) */
15 /* do not calculate exact madvise hints but assume 1 page for headers and
16 * set DONTNEED for the rest, which is assumed to be data */
17 /* Avoid calling madvise on areas that were already hinted. May be benefical if
18 * your syscalls are very slow */
21 /* the cast to void* is there to avoid this warning seen on ia64 with certain
22 versions of gcc: 'cast increases required alignment of target type'
24 #define __rrd_read(dst, dst_t, cnt) { \
25 size_t wanted = sizeof(dst_t)*(cnt); \
26 if (offset + wanted > rrd_file->file_len) { \
27 rrd_set_error("reached EOF while loading header " #dst); \
28 goto out_nullify_head; \
30 (dst) = (dst_t*)(void*) (data + offset); \
34 #define __rrd_read(dst, dst_t, cnt) { \
35 size_t wanted = sizeof(dst_t)*(cnt); \
37 if ((dst = malloc(wanted)) == NULL) { \
38 rrd_set_error(#dst " malloc"); \
39 goto out_nullify_head; \
41 got = read (rrd_simple_file->fd, dst, wanted); \
42 if (got != wanted) { \
43 rrd_set_error("short read while reading header " #dst); \
44 goto out_nullify_head; \
50 /* get the address of the start of this page */
51 #if defined USE_MADVISE || defined HAVE_POSIX_FADVISE
53 #define PAGE_START(addr) ((addr)&(~(_page_size-1)))
57 /* Open a database file, return its header and an open filehandle,
58 * positioned to the first cdp in the first rra.
59 * In the error path of rrd_open, only rrd_free(&rrd) has to be called
60 * before returning an error. Do not call rrd_close upon failure of rrd_open.
61 * If creating a new file, the parameter rrd must be initialised with
62 * details of the file content.
63 * If opening an existing file, then use rrd must be initialised by
64 * rrd_init(rrd) prior to invoking rrd_open
68 const char *const file_name,
74 mode_t mode = S_IRUSR;
78 ssize_t _page_size = sysconf(_SC_PAGESIZE);
79 char *data = MAP_FAILED;
83 rrd_file_t *rrd_file = NULL;
84 rrd_simple_file_t *rrd_simple_file = NULL;
85 off_t newfile_size = 0;
86 off_t header_len, value_cnt, data_len;
88 /* Are we creating a new file? */
89 if((rdwr & RRD_CREAT) && (rrd->stat_head != NULL))
92 sizeof(stat_head_t) + \
93 sizeof(ds_def_t) * rrd->stat_head->ds_cnt + \
94 sizeof(rra_def_t) * rrd->stat_head->rra_cnt + \
96 sizeof(live_head_t) + \
97 sizeof(pdp_prep_t) * rrd->stat_head->ds_cnt + \
98 sizeof(cdp_prep_t) * rrd->stat_head->ds_cnt * rrd->stat_head->rra_cnt + \
99 sizeof(rra_ptr_t) * rrd->stat_head->rra_cnt;
102 for (i = 0; i < rrd->stat_head->rra_cnt; i++)
103 value_cnt += rrd->stat_head->ds_cnt * rrd->rra_def[i].row_cnt;
105 data_len = sizeof(rrd_value_t) * value_cnt;
107 newfile_size = header_len + data_len;
110 rrd_file = malloc(sizeof(rrd_file_t));
111 if (rrd_file == NULL) {
112 rrd_set_error("allocating rrd_file descriptor for '%s'", file_name);
115 memset(rrd_file, 0, sizeof(rrd_file_t));
117 rrd_file->pvt = malloc(sizeof(rrd_simple_file_t));
118 if(rrd_file->pvt == NULL) {
119 rrd_set_error("allocating rrd_simple_file for '%s'", file_name);
122 memset(rrd_file->pvt, 0, sizeof(rrd_simple_file_t));
123 rrd_simple_file = (rrd_simple_file_t *)rrd_file->pvt;
126 if ((rdwr & (RRD_READONLY | RRD_READWRITE)) ==
127 (RRD_READONLY | RRD_READWRITE)) {
128 /* Both READONLY and READWRITE were given, which is invalid. */
129 rrd_set_error("in read/write request mask");
135 rrd_simple_file->mm_prot = PROT_READ;
136 rrd_simple_file->mm_flags = 0;
139 if (rdwr & RRD_READONLY) {
142 rrd_simple_file->mm_flags = MAP_PRIVATE;
143 # ifdef MAP_NORESERVE
144 rrd_simple_file->mm_flags |= MAP_NORESERVE; /* readonly, so no swap backing needed */
148 if (rdwr & RRD_READWRITE) {
152 rrd_simple_file->mm_flags = MAP_SHARED;
153 rrd_simple_file->mm_prot |= PROT_WRITE;
156 if (rdwr & RRD_CREAT) {
157 flags |= (O_CREAT | O_TRUNC);
160 if (rdwr & RRD_READAHEAD) {
162 rrd_simple_file->mm_flags |= MAP_POPULATE; /* populate ptes and data */
164 #if defined MAP_NONBLOCK
165 rrd_simple_file->mm_flags |= MAP_NONBLOCK; /* just populate ptes */
168 #if defined(_WIN32) && !defined(__CYGWIN__) && !defined(__CYGWIN32__)
172 if ((rrd_simple_file->fd = open(file_name, flags, mode)) < 0) {
173 rrd_set_error("opening '%s': %s", file_name, rrd_strerror(errno));
177 /* Better try to avoid seeks as much as possible. stat may be heavy but
178 * many concurrent seeks are even worse. */
179 if (newfile_size == 0 && ((fstat(rrd_simple_file->fd, &statb)) < 0)) {
180 rrd_set_error("fstat '%s': %s", file_name, rrd_strerror(errno));
183 if (newfile_size == 0) {
184 rrd_file->file_len = statb.st_size;
186 rrd_file->file_len = newfile_size;
187 lseek(rrd_simple_file->fd, newfile_size - 1, SEEK_SET);
188 write(rrd_simple_file->fd, "\0", 1); /* poke */
189 lseek(rrd_simple_file->fd, 0, SEEK_SET);
191 #ifdef HAVE_POSIX_FADVISE
192 /* In general we need no read-ahead when dealing with rrd_files.
193 When we stop reading, it is highly unlikely that we start up again.
194 In this manner we actually save time and diskaccess (and buffer cache).
195 Thanks to Dave Plonka for the Idea of using POSIX_FADV_RANDOM here. */
196 posix_fadvise(rrd_simple_file->fd, 0, 0, POSIX_FADV_RANDOM);
200 if (rdwr & RRD_READWRITE)
202 if (setvbuf((rrd_simple_file->fd),NULL,_IONBF,2)) {
203 rrd_set_error("failed to disable the stream buffer\n");
210 data = mmap(0, rrd_file->file_len,
211 rrd_simple_file->mm_prot, rrd_simple_file->mm_flags,
212 rrd_simple_file->fd, offset);
214 /* lets see if the first read worked */
215 if (data == MAP_FAILED) {
216 rrd_set_error("mmaping file '%s': %s", file_name,
217 rrd_strerror(errno));
220 rrd_simple_file->file_start = data;
221 if (rdwr & RRD_CREAT) {
222 memset(data, DNAN, newfile_size - 1);
226 if (rdwr & RRD_CREAT)
229 if (rdwr & RRD_COPY) {
230 /* We will read everything in a moment (copying) */
231 madvise(data, rrd_file->file_len, MADV_WILLNEED | MADV_SEQUENTIAL);
233 /* We do not need to read anything in for the moment */
234 madvise(data, rrd_file->file_len, MADV_RANDOM);
235 /* the stat_head will be needed soonish, so hint accordingly */
236 madvise(data, sizeof(stat_head_t), MADV_WILLNEED | MADV_RANDOM);
240 __rrd_read(rrd->stat_head, stat_head_t,
243 /* lets do some test if we are on track ... */
244 if (memcmp(rrd->stat_head->cookie, RRD_COOKIE, sizeof(RRD_COOKIE)) != 0) {
245 rrd_set_error("'%s' is not an RRD file", file_name);
246 goto out_nullify_head;
249 if (rrd->stat_head->float_cookie != FLOAT_COOKIE) {
250 rrd_set_error("This RRD was created on another architecture");
251 goto out_nullify_head;
254 version = atoi(rrd->stat_head->version);
256 if (version > atoi(RRD_VERSION)) {
257 rrd_set_error("can't handle RRD file version %s",
258 rrd->stat_head->version);
259 goto out_nullify_head;
261 #if defined USE_MADVISE
262 /* the ds_def will be needed soonish, so hint accordingly */
263 madvise(data + PAGE_START(offset),
264 sizeof(ds_def_t) * rrd->stat_head->ds_cnt, MADV_WILLNEED);
266 __rrd_read(rrd->ds_def, ds_def_t,
267 rrd->stat_head->ds_cnt);
269 #if defined USE_MADVISE
270 /* the rra_def will be needed soonish, so hint accordingly */
271 madvise(data + PAGE_START(offset),
272 sizeof(rra_def_t) * rrd->stat_head->rra_cnt, MADV_WILLNEED);
274 __rrd_read(rrd->rra_def, rra_def_t,
275 rrd->stat_head->rra_cnt);
277 /* handle different format for the live_head */
279 rrd->live_head = (live_head_t *) malloc(sizeof(live_head_t));
280 if (rrd->live_head == NULL) {
281 rrd_set_error("live_head_t malloc");
284 #if defined USE_MADVISE
285 /* the live_head will be needed soonish, so hint accordingly */
286 madvise(data + PAGE_START(offset), sizeof(time_t), MADV_WILLNEED);
288 __rrd_read(rrd->legacy_last_up, time_t,
291 rrd->live_head->last_up = *rrd->legacy_last_up;
292 rrd->live_head->last_up_usec = 0;
294 #if defined USE_MADVISE
295 /* the live_head will be needed soonish, so hint accordingly */
296 madvise(data + PAGE_START(offset),
297 sizeof(live_head_t), MADV_WILLNEED);
299 __rrd_read(rrd->live_head, live_head_t,
302 __rrd_read(rrd->pdp_prep, pdp_prep_t,
303 rrd->stat_head->ds_cnt);
304 __rrd_read(rrd->cdp_prep, cdp_prep_t,
305 rrd->stat_head->rra_cnt * rrd->stat_head->ds_cnt);
306 __rrd_read(rrd->rra_ptr, rra_ptr_t,
307 rrd->stat_head->rra_cnt);
309 rrd_file->header_len = offset;
310 rrd_file->pos = offset;
313 unsigned long row_cnt = 0;
316 for (i=0; i<rrd->stat_head->rra_cnt; i++)
317 row_cnt += rrd->rra_def[i].row_cnt;
319 off_t correct_len = rrd_file->header_len +
320 sizeof(rrd_value_t) * row_cnt * rrd->stat_head->ds_cnt;
322 if (correct_len > rrd_file->file_len)
324 rrd_set_error("'%s' is too small (should be %ld bytes)",
325 file_name, (long long) correct_len);
326 goto out_nullify_head;
333 rrd->stat_head = NULL;
336 if (data != MAP_FAILED)
337 munmap(data, rrd_file->file_len);
339 close(rrd_simple_file->fd);
347 #if defined DEBUG && DEBUG > 1
348 /* Print list of in-core pages of a the current rrd_file. */
351 rrd_file_t *rrd_file,
354 rrd_simple_file_t *rrd_simple_file;
355 rrd_simple_file = (rrd_simple_file_t *)rrd_file->pvt;
357 /* pretty print blocks in core */
360 ssize_t _page_size = sysconf(_SC_PAGESIZE);
362 off = rrd_file->file_len +
363 ((rrd_file->file_len + _page_size - 1) / _page_size);
367 if (mincore(rrd_simple_file->file_start, rrd_file->file_len, vec) == 0) {
369 unsigned is_in = 0, was_in = 0;
371 for (off = 0, prev = 0; off < rrd_file->file_len; ++off) {
372 is_in = vec[off] & 1; /* if lsb set then is core resident */
375 if (was_in != is_in) {
376 fprintf(stderr, "%s: %sin core: %p len %ld\n", mark,
377 was_in ? "" : "not ", vec + prev, off - prev);
383 "%s: %sin core: %p len %ld\n", mark,
384 was_in ? "" : "not ", vec + prev, off - prev);
386 fprintf(stderr, "mincore: %s", rrd_strerror(errno));
389 fprintf(stderr, "sorry mincore only works with mmap");
392 #endif /* defined DEBUG && DEBUG > 1 */
395 * get exclusive lock to whole file.
396 * lock gets removed when we close the file
398 * returns 0 on success
401 rrd_file_t *rrd_file)
404 rrd_simple_file_t *rrd_simple_file;
405 rrd_simple_file = (rrd_simple_file_t *)rrd_file->pvt;
408 #if defined(_WIN32) && !defined(__CYGWIN__) && !defined(__CYGWIN32__)
411 if (_fstat(rrd_simple_file->fd, &st) == 0) {
412 rcstat = _locking(rrd_simple_file->fd, _LK_NBLCK, st.st_size);
419 lock.l_type = F_WRLCK; /* exclusive write lock */
420 lock.l_len = 0; /* whole file */
421 lock.l_start = 0; /* start of file */
422 lock.l_whence = SEEK_SET; /* end of file */
424 rcstat = fcntl(rrd_simple_file->fd, F_SETLK, &lock);
432 /* drop cache except for the header and the active pages */
434 rrd_file_t *rrd_file,
437 rrd_simple_file_t *rrd_simple_file = (rrd_simple_file_t *)rrd_file->pvt;
438 #if defined USE_MADVISE || defined HAVE_POSIX_FADVISE
439 off_t dontneed_start;
443 ssize_t _page_size = sysconf(_SC_PAGESIZE);
445 if (rrd_file == NULL) {
446 #if defined DEBUG && DEBUG
447 fprintf (stderr, "rrd_dontneed: Argument 'rrd_file' is NULL.\n");
452 #if defined DEBUG && DEBUG > 1
453 mincore_print(rrd_file, "before");
456 /* ignoring errors from RRDs that are smaller then the file_len+rounding */
457 rra_start = rrd_file->header_len;
458 dontneed_start = PAGE_START(rra_start) + _page_size;
459 for (i = 0; i < rrd->stat_head->rra_cnt; ++i) {
462 + rrd->rra_ptr[i].cur_row
463 * rrd->stat_head->ds_cnt * sizeof(rrd_value_t));
464 if (active_block > dontneed_start) {
466 madvise(rrd_simple_file->file_start + dontneed_start,
467 active_block - dontneed_start - 1, MADV_DONTNEED);
469 /* in linux at least only fadvise DONTNEED seems to purge pages from cache */
470 #ifdef HAVE_POSIX_FADVISE
471 posix_fadvise(rrd_simple_file->fd, dontneed_start,
472 active_block - dontneed_start - 1,
473 POSIX_FADV_DONTNEED);
476 dontneed_start = active_block;
477 /* do not release 'hot' block if update for this RAA will occur
478 * within 10 minutes */
479 if (rrd->stat_head->pdp_step * rrd->rra_def[i].pdp_cnt -
480 rrd->live_head->last_up % (rrd->stat_head->pdp_step *
481 rrd->rra_def[i].pdp_cnt) < 10 * 60) {
482 dontneed_start += _page_size;
485 rrd->rra_def[i].row_cnt * rrd->stat_head->ds_cnt *
489 if (dontneed_start < rrd_file->file_len) {
491 madvise(rrd_simple_file->file_start + dontneed_start,
492 rrd_file->file_len - dontneed_start, MADV_DONTNEED);
494 #ifdef HAVE_POSIX_FADVISE
495 posix_fadvise(rrd_simple_file->fd, dontneed_start,
496 rrd_file->file_len - dontneed_start,
497 POSIX_FADV_DONTNEED);
501 #if defined DEBUG && DEBUG > 1
502 mincore_print(rrd_file, "after");
504 #endif /* without madvise and posix_fadvise ist does not make much sense todo anything */
512 rrd_file_t *rrd_file)
514 rrd_simple_file_t *rrd_simple_file;
515 rrd_simple_file = (rrd_simple_file_t *)rrd_file->pvt;
519 ret = msync(rrd_simple_file->file_start, rrd_file->file_len, MS_ASYNC);
521 rrd_set_error("msync rrd_file: %s", rrd_strerror(errno));
522 ret = munmap(rrd_simple_file->file_start, rrd_file->file_len);
524 rrd_set_error("munmap rrd_file: %s", rrd_strerror(errno));
526 ret = close(rrd_simple_file->fd);
528 rrd_set_error("closing file: %s", rrd_strerror(errno));
536 /* Set position of rrd_file. */
539 rrd_file_t *rrd_file,
544 rrd_simple_file_t *rrd_simple_file;
545 rrd_simple_file = (rrd_simple_file_t *)rrd_file->pvt;
548 if (whence == SEEK_SET)
550 else if (whence == SEEK_CUR)
551 rrd_file->pos += off;
552 else if (whence == SEEK_END)
553 rrd_file->pos = rrd_file->file_len + off;
555 ret = lseek(rrd_simple_file->fd, off, whence);
557 rrd_set_error("lseek: %s", rrd_strerror(errno));
560 /* mimic fseek, which returns 0 upon success */
561 return ret < 0; /*XXX: or just ret to mimic lseek */
565 /* Get current position in rrd_file. */
568 rrd_file_t *rrd_file)
570 return rrd_file->pos;
574 /* Read count bytes into buffer buf, starting at rrd_file->pos.
575 * Returns the number of bytes read or <0 on error. */
578 rrd_file_t *rrd_file,
582 rrd_simple_file_t *rrd_simple_file = (rrd_simple_file_t *)rrd_file->pvt;
587 if (rrd_file->pos > rrd_file->file_len || _cnt == 0) /* EOF */
590 return -1; /* EINVAL */
591 _surplus = rrd_file->pos + _cnt - rrd_file->file_len;
592 if (_surplus > 0) { /* short read */
597 buf = memcpy(buf, rrd_simple_file->file_start + rrd_file->pos, _cnt);
599 rrd_file->pos += _cnt; /* mimmic read() semantics */
604 ret = read(rrd_simple_file->fd, buf, count);
606 rrd_file->pos += ret; /* mimmic read() semantics */
612 /* Write count bytes from buffer buf to the current position
613 * rrd_file->pos of rrd_simple_file->fd.
614 * Returns the number of bytes written or <0 on error. */
617 rrd_file_t *rrd_file,
621 rrd_simple_file_t *rrd_simple_file = (rrd_simple_file_t *)rrd_file->pvt;
623 int old_size = rrd_file->file_len;
627 return -1; /* EINVAL */
629 if((rrd_file->pos + count) > old_size)
631 rrd_set_error("attempting to write beyond end of file");
634 memcpy(rrd_simple_file->file_start + rrd_file->pos, buf, count);
635 rrd_file->pos += count;
636 return count; /* mimmic write() semantics */
638 ssize_t _sz = write(rrd_simple_file->fd, buf, count);
641 rrd_file->pos += _sz;
647 /* flush all data pending to be written to FD. */
650 rrd_file_t *rrd_file)
652 rrd_simple_file_t *rrd_simple_file;
653 rrd_simple_file = (rrd_simple_file_t *)rrd_file->pvt;
654 if (fdatasync(rrd_simple_file->fd) != 0) {
655 rrd_set_error("flushing fd %d: %s", rrd_simple_file->fd,
656 rrd_strerror(errno));
661 /* Initialize RRD header. */
666 rrd->stat_head = NULL;
669 rrd->live_head = NULL;
670 rrd->legacy_last_up = NULL;
672 rrd->pdp_prep = NULL;
673 rrd->cdp_prep = NULL;
674 rrd->rrd_value = NULL;
678 /* free RRD header data. */
684 if (rrd->legacy_last_up) { /* this gets set for version < 3 only */
685 free(rrd->live_head);
692 free(rrd->live_head);
693 free(rrd->stat_head);
699 free(rrd->rrd_value);
704 /* routine used by external libraries to free memory allocated by