1 /*****************************************************************************
2 * RRDtool 1.3.2 Copyright by Tobi Oetiker, 1997-2008
3 *****************************************************************************
4 * rrd_open.c Open an RRD File
5 *****************************************************************************
7 *****************************************************************************/
13 /* DEBUG 2 prints information obtained via mincore(2) */
15 /* do not calculate exact madvise hints but assume 1 page for headers and
16 * set DONTNEED for the rest, which is assumed to be data */
17 /* Avoid calling madvise on areas that were already hinted. May be benefical if
18 * your syscalls are very slow */
21 /* the cast to void* is there to avoid this warning seen on ia64 with certain
22 versions of gcc: 'cast increases required alignment of target type'
24 #define __rrd_read(dst, dst_t, cnt) { \
25 size_t wanted = sizeof(dst_t)*(cnt); \
26 if (offset + wanted > rrd_file->file_len) { \
27 rrd_set_error("reached EOF while loading header " #dst); \
28 goto out_nullify_head; \
30 (dst) = (dst_t*)(void*) (data + offset); \
34 #define __rrd_read(dst, dst_t, cnt) { \
35 size_t wanted = sizeof(dst_t)*(cnt); \
37 if ((dst = malloc(wanted)) == NULL) { \
38 rrd_set_error(#dst " malloc"); \
39 goto out_nullify_head; \
41 got = read (rrd_file->fd, dst, wanted); \
42 if (got != wanted) { \
43 rrd_set_error("short read while reading header " #dst); \
44 goto out_nullify_head; \
50 /* get the address of the start of this page */
51 #if defined USE_MADVISE || defined HAVE_POSIX_FADVISE
53 #define PAGE_START(addr) ((addr)&(~(_page_size-1)))
57 /* Open a database file, return its header and an open filehandle,
58 * positioned to the first cdp in the first rra.
59 * In the error path of rrd_open, only rrd_free(&rrd) has to be called
60 * before returning an error. Do not call rrd_close upon failure of rrd_open.
61 * If creating a new file, the parameter rrd must be initialised with
62 * details of the file content.
63 * If opening an existing file, then use rrd must be initialised by
64 * rrd_init(rrd) prior to invoking rrd_open
68 const char *const file_name,
74 mode_t mode = S_IRUSR;
78 ssize_t _page_size = sysconf(_SC_PAGESIZE);
79 char *data = MAP_FAILED;
83 rrd_file_t *rrd_file = NULL;
84 off_t newfile_size = 0;
85 off_t header_len, value_cnt, data_len;
87 /* Are we creating a new file? */
88 if((rdwr & RRD_CREAT) && (rrd->stat_head != NULL))
91 sizeof(stat_head_t) + \
92 sizeof(ds_def_t) * rrd->stat_head->ds_cnt + \
93 sizeof(rra_def_t) * rrd->stat_head->rra_cnt + \
95 sizeof(live_head_t) + \
96 sizeof(pdp_prep_t) * rrd->stat_head->ds_cnt + \
97 sizeof(cdp_prep_t) * rrd->stat_head->ds_cnt * rrd->stat_head->rra_cnt + \
98 sizeof(rra_ptr_t) * rrd->stat_head->rra_cnt;
101 for (i = 0; i < rrd->stat_head->rra_cnt; i++)
102 value_cnt += rrd->stat_head->ds_cnt * rrd->rra_def[i].row_cnt;
104 data_len = sizeof(rrd_value_t) * value_cnt;
106 newfile_size = header_len + data_len;
109 rrd_file = malloc(sizeof(rrd_file_t));
110 if (rrd_file == NULL) {
111 rrd_set_error("allocating rrd_file descriptor for '%s'", file_name);
114 memset(rrd_file, 0, sizeof(rrd_file_t));
117 if ((rdwr & (RRD_READONLY | RRD_READWRITE)) ==
118 (RRD_READONLY | RRD_READWRITE)) {
119 /* Both READONLY and READWRITE were given, which is invalid. */
120 rrd_set_error("in read/write request mask");
126 rrd_file->mm_prot = PROT_READ;
127 rrd_file->mm_flags = 0;
130 if (rdwr & RRD_READONLY) {
133 rrd_file->mm_flags = MAP_PRIVATE;
134 # ifdef MAP_NORESERVE
135 rrd_file->mm_flags |= MAP_NORESERVE; /* readonly, so no swap backing needed */
139 if (rdwr & RRD_READWRITE) {
143 rrd_file->mm_flags = MAP_SHARED;
144 rrd_file->mm_prot |= PROT_WRITE;
147 if (rdwr & RRD_CREAT) {
148 flags |= (O_CREAT | O_TRUNC);
151 if (rdwr & RRD_READAHEAD) {
153 rrd_file->mm_flags |= MAP_POPULATE; /* populate ptes and data */
155 #if defined MAP_NONBLOCK
156 rrd_file->mm_flags |= MAP_NONBLOCK; /* just populate ptes */
159 #if defined(_WIN32) && !defined(__CYGWIN__) && !defined(__CYGWIN32__)
163 if ((rrd_file->fd = open(file_name, flags, mode)) < 0) {
164 rrd_set_error("opening '%s': %s", file_name, rrd_strerror(errno));
168 /* Better try to avoid seeks as much as possible. stat may be heavy but
169 * many concurrent seeks are even worse. */
170 if (newfile_size == 0 && ((fstat(rrd_file->fd, &statb)) < 0)) {
171 rrd_set_error("fstat '%s': %s", file_name, rrd_strerror(errno));
174 if (newfile_size == 0) {
175 rrd_file->file_len = statb.st_size;
177 rrd_file->file_len = newfile_size;
178 lseek(rrd_file->fd, newfile_size - 1, SEEK_SET);
179 write(rrd_file->fd, "\0", 1); /* poke */
180 lseek(rrd_file->fd, 0, SEEK_SET);
182 #ifdef HAVE_POSIX_FADVISE
183 /* In general we need no read-ahead when dealing with rrd_files.
184 When we stop reading, it is highly unlikely that we start up again.
185 In this manner we actually save time and diskaccess (and buffer cache).
186 Thanks to Dave Plonka for the Idea of using POSIX_FADV_RANDOM here. */
187 posix_fadvise(rrd_file->fd, 0, 0, POSIX_FADV_RANDOM);
191 if (rdwr & RRD_READWRITE)
193 if (setvbuf((rrd_file->fd),NULL,_IONBF,2)) {
194 rrd_set_error("failed to disable the stream buffer\n");
201 data = mmap(0, rrd_file->file_len, rrd_file->mm_prot, rrd_file->mm_flags,
202 rrd_file->fd, offset);
204 /* lets see if the first read worked */
205 if (data == MAP_FAILED) {
206 rrd_set_error("mmaping file '%s': %s", file_name,
207 rrd_strerror(errno));
210 rrd_file->file_start = data;
211 if (rdwr & RRD_CREAT) {
212 memset(data, DNAN, newfile_size - 1);
216 if (rdwr & RRD_CREAT)
219 if (rdwr & RRD_COPY) {
220 /* We will read everything in a moment (copying) */
221 madvise(data, rrd_file->file_len, MADV_WILLNEED | MADV_SEQUENTIAL);
223 /* We do not need to read anything in for the moment */
224 madvise(data, rrd_file->file_len, MADV_RANDOM);
225 /* the stat_head will be needed soonish, so hint accordingly */
226 madvise(data, sizeof(stat_head_t), MADV_WILLNEED | MADV_RANDOM);
230 __rrd_read(rrd->stat_head, stat_head_t,
233 /* lets do some test if we are on track ... */
234 if (memcmp(rrd->stat_head->cookie, RRD_COOKIE, sizeof(RRD_COOKIE)) != 0) {
235 rrd_set_error("'%s' is not an RRD file", file_name);
236 goto out_nullify_head;
239 if (rrd->stat_head->float_cookie != FLOAT_COOKIE) {
240 rrd_set_error("This RRD was created on another architecture");
241 goto out_nullify_head;
244 version = atoi(rrd->stat_head->version);
246 if (version > atoi(RRD_VERSION)) {
247 rrd_set_error("can't handle RRD file version %s",
248 rrd->stat_head->version);
249 goto out_nullify_head;
251 #if defined USE_MADVISE
252 /* the ds_def will be needed soonish, so hint accordingly */
253 madvise(data + PAGE_START(offset),
254 sizeof(ds_def_t) * rrd->stat_head->ds_cnt, MADV_WILLNEED);
256 __rrd_read(rrd->ds_def, ds_def_t,
257 rrd->stat_head->ds_cnt);
259 #if defined USE_MADVISE
260 /* the rra_def will be needed soonish, so hint accordingly */
261 madvise(data + PAGE_START(offset),
262 sizeof(rra_def_t) * rrd->stat_head->rra_cnt, MADV_WILLNEED);
264 __rrd_read(rrd->rra_def, rra_def_t,
265 rrd->stat_head->rra_cnt);
267 /* handle different format for the live_head */
269 rrd->live_head = (live_head_t *) malloc(sizeof(live_head_t));
270 if (rrd->live_head == NULL) {
271 rrd_set_error("live_head_t malloc");
274 #if defined USE_MADVISE
275 /* the live_head will be needed soonish, so hint accordingly */
276 madvise(data + PAGE_START(offset), sizeof(time_t), MADV_WILLNEED);
278 __rrd_read(rrd->legacy_last_up, time_t,
281 rrd->live_head->last_up = *rrd->legacy_last_up;
282 rrd->live_head->last_up_usec = 0;
284 #if defined USE_MADVISE
285 /* the live_head will be needed soonish, so hint accordingly */
286 madvise(data + PAGE_START(offset),
287 sizeof(live_head_t), MADV_WILLNEED);
289 __rrd_read(rrd->live_head, live_head_t,
292 __rrd_read(rrd->pdp_prep, pdp_prep_t,
293 rrd->stat_head->ds_cnt);
294 __rrd_read(rrd->cdp_prep, cdp_prep_t,
295 rrd->stat_head->rra_cnt * rrd->stat_head->ds_cnt);
296 __rrd_read(rrd->rra_ptr, rra_ptr_t,
297 rrd->stat_head->rra_cnt);
299 rrd_file->header_len = offset;
300 rrd_file->pos = offset;
303 unsigned long row_cnt = 0;
306 for (i=0; i<rrd->stat_head->rra_cnt; i++)
307 row_cnt += rrd->rra_def[i].row_cnt;
309 off_t correct_len = rrd_file->header_len +
310 sizeof(rrd_value_t) * row_cnt * rrd->stat_head->ds_cnt;
312 if (correct_len > rrd_file->file_len)
314 rrd_set_error("'%s' is too small (should be %ld bytes)",
315 file_name, (long long) correct_len);
316 goto out_nullify_head;
323 rrd->stat_head = NULL;
326 if (data != MAP_FAILED)
327 munmap(data, rrd_file->file_len);
336 #if defined DEBUG && DEBUG > 1
337 /* Print list of in-core pages of a the current rrd_file. */
340 rrd_file_t *rrd_file,
344 /* pretty print blocks in core */
347 ssize_t _page_size = sysconf(_SC_PAGESIZE);
349 off = rrd_file->file_len +
350 ((rrd_file->file_len + _page_size - 1) / _page_size);
354 if (mincore(rrd_file->file_start, rrd_file->file_len, vec) == 0) {
356 unsigned is_in = 0, was_in = 0;
358 for (off = 0, prev = 0; off < rrd_file->file_len; ++off) {
359 is_in = vec[off] & 1; /* if lsb set then is core resident */
362 if (was_in != is_in) {
363 fprintf(stderr, "%s: %sin core: %p len %ld\n", mark,
364 was_in ? "" : "not ", vec + prev, off - prev);
370 "%s: %sin core: %p len %ld\n", mark,
371 was_in ? "" : "not ", vec + prev, off - prev);
373 fprintf(stderr, "mincore: %s", rrd_strerror(errno));
376 fprintf(stderr, "sorry mincore only works with mmap");
379 #endif /* defined DEBUG && DEBUG > 1 */
382 * get exclusive lock to whole file.
383 * lock gets removed when we close the file
385 * returns 0 on success
393 #if defined(_WIN32) && !defined(__CYGWIN__) && !defined(__CYGWIN32__)
396 if (_fstat(file->fd, &st) == 0) {
397 rcstat = _locking(file->fd, _LK_NBLCK, st.st_size);
404 lock.l_type = F_WRLCK; /* exclusive write lock */
405 lock.l_len = 0; /* whole file */
406 lock.l_start = 0; /* start of file */
407 lock.l_whence = SEEK_SET; /* end of file */
409 rcstat = fcntl(file->fd, F_SETLK, &lock);
417 /* drop cache except for the header and the active pages */
419 rrd_file_t *rrd_file,
422 #if defined USE_MADVISE || defined HAVE_POSIX_FADVISE
423 off_t dontneed_start;
427 ssize_t _page_size = sysconf(_SC_PAGESIZE);
429 if (rrd_file == NULL) {
430 #if defined DEBUG && DEBUG
431 fprintf (stderr, "rrd_dontneed: Argument 'rrd_file' is NULL.\n");
436 #if defined DEBUG && DEBUG > 1
437 mincore_print(rrd_file, "before");
440 /* ignoring errors from RRDs that are smaller then the file_len+rounding */
441 rra_start = rrd_file->header_len;
442 dontneed_start = PAGE_START(rra_start) + _page_size;
443 for (i = 0; i < rrd->stat_head->rra_cnt; ++i) {
446 + rrd->rra_ptr[i].cur_row
447 * rrd->stat_head->ds_cnt * sizeof(rrd_value_t));
448 if (active_block > dontneed_start) {
450 madvise(rrd_file->file_start + dontneed_start,
451 active_block - dontneed_start - 1, MADV_DONTNEED);
453 /* in linux at least only fadvise DONTNEED seems to purge pages from cache */
454 #ifdef HAVE_POSIX_FADVISE
455 posix_fadvise(rrd_file->fd, dontneed_start,
456 active_block - dontneed_start - 1,
457 POSIX_FADV_DONTNEED);
460 dontneed_start = active_block;
461 /* do not release 'hot' block if update for this RAA will occur
462 * within 10 minutes */
463 if (rrd->stat_head->pdp_step * rrd->rra_def[i].pdp_cnt -
464 rrd->live_head->last_up % (rrd->stat_head->pdp_step *
465 rrd->rra_def[i].pdp_cnt) < 10 * 60) {
466 dontneed_start += _page_size;
469 rrd->rra_def[i].row_cnt * rrd->stat_head->ds_cnt *
473 if (dontneed_start < rrd_file->file_len) {
475 madvise(rrd_file->file_start + dontneed_start,
476 rrd_file->file_len - dontneed_start, MADV_DONTNEED);
478 #ifdef HAVE_POSIX_FADVISE
479 posix_fadvise(rrd_file->fd, dontneed_start,
480 rrd_file->file_len - dontneed_start,
481 POSIX_FADV_DONTNEED);
485 #if defined DEBUG && DEBUG > 1
486 mincore_print(rrd_file, "after");
488 #endif /* without madvise and posix_fadvise ist does not make much sense todo anything */
496 rrd_file_t *rrd_file)
501 ret = msync(rrd_file->file_start, rrd_file->file_len, MS_ASYNC);
503 rrd_set_error("msync rrd_file: %s", rrd_strerror(errno));
504 ret = munmap(rrd_file->file_start, rrd_file->file_len);
506 rrd_set_error("munmap rrd_file: %s", rrd_strerror(errno));
508 ret = close(rrd_file->fd);
510 rrd_set_error("closing file: %s", rrd_strerror(errno));
517 /* Set position of rrd_file. */
520 rrd_file_t *rrd_file,
527 if (whence == SEEK_SET)
529 else if (whence == SEEK_CUR)
530 rrd_file->pos += off;
531 else if (whence == SEEK_END)
532 rrd_file->pos = rrd_file->file_len + off;
534 ret = lseek(rrd_file->fd, off, whence);
536 rrd_set_error("lseek: %s", rrd_strerror(errno));
539 /* mimic fseek, which returns 0 upon success */
540 return ret < 0; /*XXX: or just ret to mimic lseek */
544 /* Get current position in rrd_file. */
547 rrd_file_t *rrd_file)
549 return rrd_file->pos;
553 /* Read count bytes into buffer buf, starting at rrd_file->pos.
554 * Returns the number of bytes read or <0 on error. */
557 rrd_file_t *rrd_file,
565 if (rrd_file->pos > rrd_file->file_len || _cnt == 0) /* EOF */
568 return -1; /* EINVAL */
569 _surplus = rrd_file->pos + _cnt - rrd_file->file_len;
570 if (_surplus > 0) { /* short read */
575 buf = memcpy(buf, rrd_file->file_start + rrd_file->pos, _cnt);
577 rrd_file->pos += _cnt; /* mimmic read() semantics */
582 ret = read(rrd_file->fd, buf, count);
584 rrd_file->pos += ret; /* mimmic read() semantics */
590 /* Write count bytes from buffer buf to the current position
591 * rrd_file->pos of rrd_file->fd.
592 * Returns the number of bytes written or <0 on error. */
595 rrd_file_t *rrd_file,
600 int old_size = rrd_file->file_len;
604 return -1; /* EINVAL */
606 if((rrd_file->pos + count) > old_size)
608 rrd_set_error("attempting to write beyond end of file");
611 memcpy(rrd_file->file_start + rrd_file->pos, buf, count);
612 rrd_file->pos += count;
613 return count; /* mimmic write() semantics */
615 ssize_t _sz = write(rrd_file->fd, buf, count);
618 rrd_file->pos += _sz;
624 /* flush all data pending to be written to FD. */
627 rrd_file_t *rrd_file)
629 if (fdatasync(rrd_file->fd) != 0) {
630 rrd_set_error("flushing fd %d: %s", rrd_file->fd,
631 rrd_strerror(errno));
636 /* Initialize RRD header. */
641 rrd->stat_head = NULL;
644 rrd->live_head = NULL;
645 rrd->legacy_last_up = NULL;
647 rrd->pdp_prep = NULL;
648 rrd->cdp_prep = NULL;
649 rrd->rrd_value = NULL;
653 /* free RRD header data. */
659 if (rrd->legacy_last_up) { /* this gets set for version < 3 only */
660 free(rrd->live_head);
667 free(rrd->live_head);
668 free(rrd->stat_head);
674 free(rrd->rrd_value);
679 /* routine used by external libraries to free memory allocated by