1 /*****************************************************************************
2 * RRDtool 1.3.2 Copyright by Tobi Oetiker, 1997-2008
3 *****************************************************************************
4 * rrd_open.c Open an RRD File
5 *****************************************************************************
7 *****************************************************************************/
13 /* DEBUG 2 prints information obtained via mincore(2) */
15 /* do not calculate exact madvise hints but assume 1 page for headers and
16 * set DONTNEED for the rest, which is assumed to be data */
17 /* Avoid calling madvise on areas that were already hinted. May be benefical if
18 * your syscalls are very slow */
21 /* the cast to void* is there to avoid this warning seen on ia64 with certain
22 versions of gcc: 'cast increases required alignment of target type'
24 #define __rrd_read(dst, dst_t, cnt) { \
25 size_t wanted = sizeof(dst_t)*(cnt); \
26 if (offset + wanted > rrd_file->file_len) { \
27 rrd_set_error("reached EOF while loading header " #dst); \
28 goto out_nullify_head; \
30 (dst) = (dst_t*)(void*) (data + offset); \
34 #define __rrd_read(dst, dst_t, cnt) { \
35 size_t wanted = sizeof(dst_t)*(cnt); \
37 if ((dst = malloc(wanted)) == NULL) { \
38 rrd_set_error(#dst " malloc"); \
39 goto out_nullify_head; \
41 got = read (rrd_file->fd, dst, wanted); \
42 if (got != wanted) { \
43 rrd_set_error("short read while reading header " #dst); \
44 goto out_nullify_head; \
50 /* get the address of the start of this page */
51 #if defined USE_MADVISE || defined HAVE_POSIX_FADVISE
53 #define PAGE_START(addr) ((addr)&(~(_page_size-1)))
57 /* Open a database file, return its header and an open filehandle,
58 * positioned to the first cdp in the first rra.
59 * In the error path of rrd_open, only rrd_free(&rrd) has to be called
60 * before returning an error. Do not call rrd_close upon failure of rrd_open.
64 const char *const file_name,
69 mode_t mode = S_IRUSR;
73 ssize_t _page_size = sysconf(_SC_PAGESIZE);
74 int mm_prot = PROT_READ, mm_flags = 0;
75 char *data = MAP_FAILED;
79 rrd_file_t *rrd_file = NULL;
80 off_t newfile_size = 0;
82 if (rdwr & RRD_CREAT) {
83 /* yes bad inline signaling alert, we are using the
84 floatcookie to pass the size in ... only used in resize */
85 newfile_size = (off_t) rrd->stat_head->float_cookie;
89 rrd_file = malloc(sizeof(rrd_file_t));
90 if (rrd_file == NULL) {
91 rrd_set_error("allocating rrd_file descriptor for '%s'", file_name);
94 memset(rrd_file, 0, sizeof(rrd_file_t));
97 if ((rdwr & (RRD_READONLY | RRD_READWRITE)) ==
98 (RRD_READONLY | RRD_READWRITE)) {
99 /* Both READONLY and READWRITE were given, which is invalid. */
100 rrd_set_error("in read/write request mask");
104 if (rdwr & RRD_READONLY) {
107 mm_flags = MAP_PRIVATE;
108 # ifdef MAP_NORESERVE
109 mm_flags |= MAP_NORESERVE; /* readonly, so no swap backing needed */
113 if (rdwr & RRD_READWRITE) {
117 mm_flags = MAP_SHARED;
118 mm_prot |= PROT_WRITE;
121 if (rdwr & RRD_CREAT) {
122 flags |= (O_CREAT | O_TRUNC);
125 if (rdwr & RRD_READAHEAD) {
127 mm_flags |= MAP_POPULATE; /* populate ptes and data */
129 #if defined MAP_NONBLOCK
130 mm_flags |= MAP_NONBLOCK; /* just populate ptes */
133 #if defined(_WIN32) && !defined(__CYGWIN__) && !defined(__CYGWIN32__)
137 if ((rrd_file->fd = open(file_name, flags, mode)) < 0) {
138 rrd_set_error("opening '%s': %s", file_name, rrd_strerror(errno));
142 /* Better try to avoid seeks as much as possible. stat may be heavy but
143 * many concurrent seeks are even worse. */
144 if (newfile_size == 0 && ((fstat(rrd_file->fd, &statb)) < 0)) {
145 rrd_set_error("fstat '%s': %s", file_name, rrd_strerror(errno));
148 if (newfile_size == 0) {
149 rrd_file->file_len = statb.st_size;
151 rrd_file->file_len = newfile_size;
152 lseek(rrd_file->fd, newfile_size - 1, SEEK_SET);
153 write(rrd_file->fd, "\0", 1); /* poke */
154 lseek(rrd_file->fd, 0, SEEK_SET);
156 #ifdef HAVE_POSIX_FADVISE
157 /* In general we need no read-ahead when dealing with rrd_files.
158 When we stop reading, it is highly unlikely that we start up again.
159 In this manner we actually save time and diskaccess (and buffer cache).
160 Thanks to Dave Plonka for the Idea of using POSIX_FADV_RANDOM here. */
161 posix_fadvise(rrd_file->fd, 0, 0, POSIX_FADV_RANDOM);
165 if (rdwr & RRD_READWRITE)
167 if (setvbuf((rrd_file->fd),NULL,_IONBF,2)) {
168 rrd_set_error("failed to disable the stream buffer\n");
174 data = mmap(0, rrd_file->file_len, mm_prot, mm_flags,
175 rrd_file->fd, offset);
177 /* lets see if the first read worked */
178 if (data == MAP_FAILED) {
179 rrd_set_error("mmaping file '%s': %s", file_name,
180 rrd_strerror(errno));
183 rrd_file->file_start = data;
184 if (rdwr & RRD_CREAT) {
185 memset(data, DNAN, newfile_size - 1);
189 if (rdwr & RRD_CREAT)
192 if (rdwr & RRD_COPY) {
193 /* We will read everything in a moment (copying) */
194 madvise(data, rrd_file->file_len, MADV_WILLNEED | MADV_SEQUENTIAL);
196 /* We do not need to read anything in for the moment */
197 madvise(data, rrd_file->file_len, MADV_RANDOM);
198 /* the stat_head will be needed soonish, so hint accordingly */
199 madvise(data, sizeof(stat_head_t), MADV_WILLNEED | MADV_RANDOM);
203 __rrd_read(rrd->stat_head, stat_head_t,
206 /* lets do some test if we are on track ... */
207 if (memcmp(rrd->stat_head->cookie, RRD_COOKIE, sizeof(RRD_COOKIE)) != 0) {
208 rrd_set_error("'%s' is not an RRD file", file_name);
209 goto out_nullify_head;
212 if (rrd->stat_head->float_cookie != FLOAT_COOKIE) {
213 rrd_set_error("This RRD was created on another architecture");
214 goto out_nullify_head;
217 version = atoi(rrd->stat_head->version);
219 if (version > atoi(RRD_VERSION)) {
220 rrd_set_error("can't handle RRD file version %s",
221 rrd->stat_head->version);
222 goto out_nullify_head;
224 #if defined USE_MADVISE
225 /* the ds_def will be needed soonish, so hint accordingly */
226 madvise(data + PAGE_START(offset),
227 sizeof(ds_def_t) * rrd->stat_head->ds_cnt, MADV_WILLNEED);
229 __rrd_read(rrd->ds_def, ds_def_t,
230 rrd->stat_head->ds_cnt);
232 #if defined USE_MADVISE
233 /* the rra_def will be needed soonish, so hint accordingly */
234 madvise(data + PAGE_START(offset),
235 sizeof(rra_def_t) * rrd->stat_head->rra_cnt, MADV_WILLNEED);
237 __rrd_read(rrd->rra_def, rra_def_t,
238 rrd->stat_head->rra_cnt);
240 /* handle different format for the live_head */
242 rrd->live_head = (live_head_t *) malloc(sizeof(live_head_t));
243 if (rrd->live_head == NULL) {
244 rrd_set_error("live_head_t malloc");
247 #if defined USE_MADVISE
248 /* the live_head will be needed soonish, so hint accordingly */
249 madvise(data + PAGE_START(offset), sizeof(time_t), MADV_WILLNEED);
251 __rrd_read(rrd->legacy_last_up, time_t,
254 rrd->live_head->last_up = *rrd->legacy_last_up;
255 rrd->live_head->last_up_usec = 0;
257 #if defined USE_MADVISE
258 /* the live_head will be needed soonish, so hint accordingly */
259 madvise(data + PAGE_START(offset),
260 sizeof(live_head_t), MADV_WILLNEED);
262 __rrd_read(rrd->live_head, live_head_t,
265 __rrd_read(rrd->pdp_prep, pdp_prep_t,
266 rrd->stat_head->ds_cnt);
267 __rrd_read(rrd->cdp_prep, cdp_prep_t,
268 rrd->stat_head->rra_cnt * rrd->stat_head->ds_cnt);
269 __rrd_read(rrd->rra_ptr, rra_ptr_t,
270 rrd->stat_head->rra_cnt);
272 rrd_file->header_len = offset;
273 rrd_file->pos = offset;
276 unsigned long row_cnt = 0;
279 for (i=0; i<rrd->stat_head->rra_cnt; i++)
280 row_cnt += rrd->rra_def[i].row_cnt;
282 off_t correct_len = rrd_file->header_len +
283 sizeof(rrd_value_t) * row_cnt * rrd->stat_head->ds_cnt;
285 if (correct_len > rrd_file->file_len)
287 rrd_set_error("'%s' is too small (should be %ld bytes)",
288 file_name, (long long) correct_len);
289 goto out_nullify_head;
296 rrd->stat_head = NULL;
299 if (data != MAP_FAILED)
300 munmap(data, rrd_file->file_len);
309 #if defined DEBUG && DEBUG > 1
310 /* Print list of in-core pages of a the current rrd_file. */
313 rrd_file_t *rrd_file,
317 /* pretty print blocks in core */
320 ssize_t _page_size = sysconf(_SC_PAGESIZE);
322 off = rrd_file->file_len +
323 ((rrd_file->file_len + _page_size - 1) / _page_size);
327 if (mincore(rrd_file->file_start, rrd_file->file_len, vec) == 0) {
329 unsigned is_in = 0, was_in = 0;
331 for (off = 0, prev = 0; off < rrd_file->file_len; ++off) {
332 is_in = vec[off] & 1; /* if lsb set then is core resident */
335 if (was_in != is_in) {
336 fprintf(stderr, "%s: %sin core: %p len %ld\n", mark,
337 was_in ? "" : "not ", vec + prev, off - prev);
343 "%s: %sin core: %p len %ld\n", mark,
344 was_in ? "" : "not ", vec + prev, off - prev);
346 fprintf(stderr, "mincore: %s", rrd_strerror(errno));
349 fprintf(stderr, "sorry mincore only works with mmap");
352 #endif /* defined DEBUG && DEBUG > 1 */
355 /* drop cache except for the header and the active pages */
357 rrd_file_t *rrd_file,
360 #if defined USE_MADVISE || defined HAVE_POSIX_FADVISE
361 off_t dontneed_start;
365 ssize_t _page_size = sysconf(_SC_PAGESIZE);
367 #if defined DEBUG && DEBUG > 1
368 mincore_print(rrd_file, "before");
371 /* ignoring errors from RRDs that are smaller then the file_len+rounding */
372 rra_start = rrd_file->header_len;
373 dontneed_start = PAGE_START(rra_start) + _page_size;
374 for (i = 0; i < rrd->stat_head->rra_cnt; ++i) {
377 + rrd->rra_ptr[i].cur_row
378 * rrd->stat_head->ds_cnt * sizeof(rrd_value_t));
379 if (active_block > dontneed_start) {
381 madvise(rrd_file->file_start + dontneed_start,
382 active_block - dontneed_start - 1, MADV_DONTNEED);
384 /* in linux at least only fadvise DONTNEED seems to purge pages from cache */
385 #ifdef HAVE_POSIX_FADVISE
386 posix_fadvise(rrd_file->fd, dontneed_start,
387 active_block - dontneed_start - 1,
388 POSIX_FADV_DONTNEED);
391 dontneed_start = active_block;
392 /* do not release 'hot' block if update for this RAA will occur
393 * within 10 minutes */
394 if (rrd->stat_head->pdp_step * rrd->rra_def[i].pdp_cnt -
395 rrd->live_head->last_up % (rrd->stat_head->pdp_step *
396 rrd->rra_def[i].pdp_cnt) < 10 * 60) {
397 dontneed_start += _page_size;
400 rrd->rra_def[i].row_cnt * rrd->stat_head->ds_cnt *
404 if (dontneed_start < rrd_file->file_len) {
406 madvise(rrd_file->file_start + dontneed_start,
407 rrd_file->file_len - dontneed_start, MADV_DONTNEED);
409 #ifdef HAVE_POSIX_FADVISE
410 posix_fadvise(rrd_file->fd, dontneed_start,
411 rrd_file->file_len - dontneed_start,
412 POSIX_FADV_DONTNEED);
416 #if defined DEBUG && DEBUG > 1
417 mincore_print(rrd_file, "after");
419 #endif /* without madvise and posix_fadvise ist does not make much sense todo anything */
427 rrd_file_t *rrd_file)
432 ret = msync(rrd_file->file_start, rrd_file->file_len, MS_ASYNC);
434 rrd_set_error("msync rrd_file: %s", rrd_strerror(errno));
435 ret = munmap(rrd_file->file_start, rrd_file->file_len);
437 rrd_set_error("munmap rrd_file: %s", rrd_strerror(errno));
439 ret = close(rrd_file->fd);
441 rrd_set_error("closing file: %s", rrd_strerror(errno));
448 /* Set position of rrd_file. */
451 rrd_file_t *rrd_file,
458 if (whence == SEEK_SET)
460 else if (whence == SEEK_CUR)
461 rrd_file->pos += off;
462 else if (whence == SEEK_END)
463 rrd_file->pos = rrd_file->file_len + off;
465 ret = lseek(rrd_file->fd, off, whence);
467 rrd_set_error("lseek: %s", rrd_strerror(errno));
470 /* mimic fseek, which returns 0 upon success */
471 return ret < 0; /*XXX: or just ret to mimic lseek */
475 /* Get current position in rrd_file. */
478 rrd_file_t *rrd_file)
480 return rrd_file->pos;
484 /* Read count bytes into buffer buf, starting at rrd_file->pos.
485 * Returns the number of bytes read or <0 on error. */
488 rrd_file_t *rrd_file,
496 if (rrd_file->pos > rrd_file->file_len || _cnt == 0) /* EOF */
499 return -1; /* EINVAL */
500 _surplus = rrd_file->pos + _cnt - rrd_file->file_len;
501 if (_surplus > 0) { /* short read */
506 buf = memcpy(buf, rrd_file->file_start + rrd_file->pos, _cnt);
508 rrd_file->pos += _cnt; /* mimmic read() semantics */
513 ret = read(rrd_file->fd, buf, count);
515 rrd_file->pos += ret; /* mimmic read() semantics */
521 /* Write count bytes from buffer buf to the current position
522 * rrd_file->pos of rrd_file->fd.
523 * Returns the number of bytes written or <0 on error. */
526 rrd_file_t *rrd_file,
534 return -1; /* EINVAL */
535 memcpy(rrd_file->file_start + rrd_file->pos, buf, count);
536 rrd_file->pos += count;
537 return count; /* mimmic write() semantics */
539 ssize_t _sz = write(rrd_file->fd, buf, count);
542 rrd_file->pos += _sz;
548 /* flush all data pending to be written to FD. */
551 rrd_file_t *rrd_file)
553 if (fdatasync(rrd_file->fd) != 0) {
554 rrd_set_error("flushing fd %d: %s", rrd_file->fd,
555 rrd_strerror(errno));
560 /* Initialize RRD header. */
565 rrd->stat_head = NULL;
568 rrd->live_head = NULL;
569 rrd->legacy_last_up = NULL;
571 rrd->pdp_prep = NULL;
572 rrd->cdp_prep = NULL;
573 rrd->rrd_value = NULL;
577 /* free RRD header data. */
583 if (rrd->legacy_last_up) { /* this gets set for version < 3 only */
584 free(rrd->live_head);
591 free(rrd->live_head);
592 free(rrd->stat_head);
598 free(rrd->rrd_value);
603 /* routine used by external libraries to free memory allocated by