1 /*****************************************************************************
2 * RRDtool 1.3rc9 Copyright by Tobi Oetiker, 1997-2008
3 *****************************************************************************
4 * rrd_open.c Open an RRD File
5 *****************************************************************************
7 *****************************************************************************/
13 /* DEBUG 2 prints information obtained via mincore(2) */
15 /* do not calculate exact madvise hints but assume 1 page for headers and
16 * set DONTNEED for the rest, which is assumed to be data */
17 /* Avoid calling madvise on areas that were already hinted. May be benefical if
18 * your syscalls are very slow */
21 /* the cast to void* is there to avoid this warning seen on ia64 with certain
22 versions of gcc: 'cast increases required alignment of target type'
24 #define __rrd_read(dst, dst_t, cnt) \
25 (dst) = (dst_t*)(void*) (data + offset); \
26 offset += sizeof(dst_t) * (cnt)
28 #define __rrd_read(dst, dst_t, cnt) \
29 if ((dst = malloc(sizeof(dst_t)*(cnt))) == NULL) { \
30 rrd_set_error(#dst " malloc"); \
31 goto out_nullify_head; \
33 offset += read (rrd_file->fd, dst, sizeof(dst_t)*(cnt))
36 /* get the address of the start of this page */
37 #if defined USE_MADVISE || defined HAVE_POSIX_FADVISE
39 #define PAGE_START(addr) ((addr)&(~(_page_size-1)))
43 /* Open a database file, return its header and an open filehandle,
44 * positioned to the first cdp in the first rra.
45 * In the error path of rrd_open, only rrd_free(&rrd) has to be called
46 * before returning an error. Do not call rrd_close upon failure of rrd_open.
50 const char *const file_name,
55 mode_t mode = S_IRUSR;
59 ssize_t _page_size = sysconf(_SC_PAGESIZE);
60 int mm_prot = PROT_READ, mm_flags = 0;
65 rrd_file_t *rrd_file = NULL;
66 off_t newfile_size = 0;
68 if (rdwr & RRD_CREAT) {
69 /* yes bad inline signaling alert, we are using the
70 floatcookie to pass the size in ... only used in resize */
71 newfile_size = (off_t) rrd->stat_head->float_cookie;
75 rrd_file = malloc(sizeof(rrd_file_t));
76 if (rrd_file == NULL) {
77 rrd_set_error("allocating rrd_file descriptor for '%s'", file_name);
80 memset(rrd_file, 0, sizeof(rrd_file_t));
83 if ((rdwr & (RRD_READONLY | RRD_READWRITE)) ==
84 (RRD_READONLY | RRD_READWRITE)) {
85 /* Both READONLY and READWRITE were given, which is invalid. */
86 rrd_set_error("in read/write request mask");
90 if (rdwr & RRD_READONLY) {
93 mm_flags = MAP_PRIVATE;
95 mm_flags |= MAP_NORESERVE; /* readonly, so no swap backing needed */
99 if (rdwr & RRD_READWRITE) {
103 mm_flags = MAP_SHARED;
104 mm_prot |= PROT_WRITE;
107 if (rdwr & RRD_CREAT) {
108 flags |= (O_CREAT | O_TRUNC);
111 if (rdwr & RRD_READAHEAD) {
113 mm_flags |= MAP_POPULATE; /* populate ptes and data */
115 #if defined MAP_NONBLOCK
116 mm_flags |= MAP_NONBLOCK; /* just populate ptes */
120 if ((rrd_file->fd = open(file_name, flags, mode)) < 0) {
121 rrd_set_error("opening '%s': %s", file_name, rrd_strerror(errno));
125 /* Better try to avoid seeks as much as possible. stat may be heavy but
126 * many concurrent seeks are even worse. */
127 if (newfile_size == 0 && ((fstat(rrd_file->fd, &statb)) < 0)) {
128 rrd_set_error("fstat '%s': %s", file_name, rrd_strerror(errno));
131 if (newfile_size == 0) {
132 rrd_file->file_len = statb.st_size;
134 rrd_file->file_len = newfile_size;
135 lseek(rrd_file->fd, newfile_size - 1, SEEK_SET);
136 write(rrd_file->fd, "\0", 1); /* poke */
137 lseek(rrd_file->fd, 0, SEEK_SET);
139 #ifdef HAVE_POSIX_FADVISE
140 /* In general we need no read-ahead when dealing with rrd_files.
141 When we stop reading, it is highly unlikely that we start up again.
142 In this manner we actually save time and diskaccess (and buffer cache).
143 Thanks to Dave Plonka for the Idea of using POSIX_FADV_RANDOM here. */
144 posix_fadvise(rrd_file->fd, 0, 0, POSIX_FADV_RANDOM);
148 if (rdwr & RRD_READWRITE)
150 if (setvbuf((rrd_file->fd),NULL,_IONBF,2)) {
151 rrd_set_error("failed to disable the stream buffer\n");
157 data = mmap(0, rrd_file->file_len, mm_prot, mm_flags,
158 rrd_file->fd, offset);
160 /* lets see if the first read worked */
161 if (data == MAP_FAILED) {
162 rrd_set_error("mmaping file '%s': %s", file_name,
163 rrd_strerror(errno));
166 rrd_file->file_start = data;
167 if (rdwr & RRD_CREAT) {
168 memset(data, DNAN, newfile_size - 1);
172 if (rdwr & RRD_CREAT)
175 if (rdwr & RRD_COPY) {
176 /* We will read everything in a moment (copying) */
177 madvise(data, rrd_file->file_len, MADV_WILLNEED | MADV_SEQUENTIAL);
179 /* We do not need to read anything in for the moment */
180 madvise(data, rrd_file->file_len, MADV_RANDOM);
181 /* the stat_head will be needed soonish, so hint accordingly */
182 madvise(data, sizeof(stat_head_t), MADV_WILLNEED | MADV_RANDOM);
186 __rrd_read(rrd->stat_head, stat_head_t,
189 /* lets do some test if we are on track ... */
190 if (memcmp(rrd->stat_head->cookie, RRD_COOKIE, sizeof(RRD_COOKIE)) != 0) {
191 rrd_set_error("'%s' is not an RRD file", file_name);
192 goto out_nullify_head;
195 if (rrd->stat_head->float_cookie != FLOAT_COOKIE) {
196 rrd_set_error("This RRD was created on another architecture");
197 goto out_nullify_head;
200 version = atoi(rrd->stat_head->version);
202 if (version > atoi(RRD_VERSION)) {
203 rrd_set_error("can't handle RRD file version %s",
204 rrd->stat_head->version);
205 goto out_nullify_head;
207 #if defined USE_MADVISE
208 /* the ds_def will be needed soonish, so hint accordingly */
209 madvise(data + PAGE_START(offset),
210 sizeof(ds_def_t) * rrd->stat_head->ds_cnt, MADV_WILLNEED);
212 __rrd_read(rrd->ds_def, ds_def_t,
213 rrd->stat_head->ds_cnt);
215 #if defined USE_MADVISE
216 /* the rra_def will be needed soonish, so hint accordingly */
217 madvise(data + PAGE_START(offset),
218 sizeof(rra_def_t) * rrd->stat_head->rra_cnt, MADV_WILLNEED);
220 __rrd_read(rrd->rra_def, rra_def_t,
221 rrd->stat_head->rra_cnt);
223 /* handle different format for the live_head */
225 rrd->live_head = (live_head_t *) malloc(sizeof(live_head_t));
226 if (rrd->live_head == NULL) {
227 rrd_set_error("live_head_t malloc");
230 #if defined USE_MADVISE
231 /* the live_head will be needed soonish, so hint accordingly */
232 madvise(data + PAGE_START(offset), sizeof(time_t), MADV_WILLNEED);
234 __rrd_read(rrd->legacy_last_up, time_t,
237 rrd->live_head->last_up = *rrd->legacy_last_up;
238 rrd->live_head->last_up_usec = 0;
240 #if defined USE_MADVISE
241 /* the live_head will be needed soonish, so hint accordingly */
242 madvise(data + PAGE_START(offset),
243 sizeof(live_head_t), MADV_WILLNEED);
245 __rrd_read(rrd->live_head, live_head_t,
248 __rrd_read(rrd->pdp_prep, pdp_prep_t,
249 rrd->stat_head->ds_cnt);
250 __rrd_read(rrd->cdp_prep, cdp_prep_t,
251 rrd->stat_head->rra_cnt * rrd->stat_head->ds_cnt);
252 __rrd_read(rrd->rra_ptr, rra_ptr_t,
253 rrd->stat_head->rra_cnt);
255 rrd_file->header_len = offset;
256 rrd_file->pos = offset;
260 rrd->stat_head = NULL;
269 #if defined DEBUG && DEBUG > 1
270 /* Print list of in-core pages of a the current rrd_file. */
273 rrd_file_t *rrd_file,
277 /* pretty print blocks in core */
280 ssize_t _page_size = sysconf(_SC_PAGESIZE);
282 off = rrd_file->file_len +
283 ((rrd_file->file_len + _page_size - 1) / _page_size);
287 if (mincore(rrd_file->file_start, rrd_file->file_len, vec) == 0) {
289 unsigned is_in = 0, was_in = 0;
291 for (off = 0, prev = 0; off < rrd_file->file_len; ++off) {
292 is_in = vec[off] & 1; /* if lsb set then is core resident */
295 if (was_in != is_in) {
296 fprintf(stderr, "%s: %sin core: %p len %ld\n", mark,
297 was_in ? "" : "not ", vec + prev, off - prev);
303 "%s: %sin core: %p len %ld\n", mark,
304 was_in ? "" : "not ", vec + prev, off - prev);
306 fprintf(stderr, "mincore: %s", rrd_strerror(errno));
309 fprintf(stderr, "sorry mincore only works with mmap");
312 #endif /* defined DEBUG && DEBUG > 1 */
315 /* drop cache except for the header and the active pages */
317 rrd_file_t *rrd_file,
320 #if defined USE_MADVISE || defined HAVE_POSIX_FADVISE
321 unsigned long dontneed_start;
322 unsigned long rra_start;
323 unsigned long active_block;
325 ssize_t _page_size = sysconf(_SC_PAGESIZE);
327 #if defined DEBUG && DEBUG > 1
328 mincore_print(rrd_file, "before");
331 /* ignoring errors from RRDs that are smaller then the file_len+rounding */
332 rra_start = rrd_file->header_len;
333 dontneed_start = PAGE_START(rra_start) + _page_size;
334 for (i = 0; i < rrd->stat_head->rra_cnt; ++i) {
337 + rrd->rra_ptr[i].cur_row
338 * rrd->stat_head->ds_cnt * sizeof(rrd_value_t));
339 if (active_block > dontneed_start) {
341 madvise(rrd_file->file_start + dontneed_start,
342 active_block - dontneed_start - 1, MADV_DONTNEED);
344 /* in linux at least only fadvise DONTNEED seems to purge pages from cache */
345 #ifdef HAVE_POSIX_FADVISE
346 posix_fadvise(rrd_file->fd, dontneed_start,
347 active_block - dontneed_start - 1,
348 POSIX_FADV_DONTNEED);
351 dontneed_start = active_block;
352 /* do not release 'hot' block if update for this RAA will occur
353 * within 10 minutes */
354 if (rrd->stat_head->pdp_step * rrd->rra_def[i].pdp_cnt -
355 rrd->live_head->last_up % (rrd->stat_head->pdp_step *
356 rrd->rra_def[i].pdp_cnt) < 10 * 60) {
357 dontneed_start += _page_size;
360 rrd->rra_def[i].row_cnt * rrd->stat_head->ds_cnt *
364 madvise(rrd_file->file_start + dontneed_start,
365 rrd_file->file_len - dontneed_start, MADV_DONTNEED);
367 #ifdef HAVE_POSIX_FADVISE
368 posix_fadvise(rrd_file->fd, dontneed_start,
369 rrd_file->file_len - dontneed_start, POSIX_FADV_DONTNEED);
371 #if defined DEBUG && DEBUG > 1
372 mincore_print(rrd_file, "after");
374 #endif /* without madvise and posix_fadvise ist does not make much sense todo anything */
382 rrd_file_t *rrd_file)
387 ret = msync(rrd_file->file_start, rrd_file->file_len, MS_ASYNC);
389 rrd_set_error("msync rrd_file: %s", rrd_strerror(errno));
390 ret = munmap(rrd_file->file_start, rrd_file->file_len);
392 rrd_set_error("munmap rrd_file: %s", rrd_strerror(errno));
394 ret = close(rrd_file->fd);
396 rrd_set_error("closing file: %s", rrd_strerror(errno));
403 /* Set position of rrd_file. */
406 rrd_file_t *rrd_file,
413 if (whence == SEEK_SET)
415 else if (whence == SEEK_CUR)
416 rrd_file->pos += off;
417 else if (whence == SEEK_END)
418 rrd_file->pos = rrd_file->file_len + off;
420 ret = lseek(rrd_file->fd, off, whence);
422 rrd_set_error("lseek: %s", rrd_strerror(errno));
425 /* mimic fseek, which returns 0 upon success */
426 return ret < 0; /*XXX: or just ret to mimic lseek */
430 /* Get current position in rrd_file. */
432 inline off_t rrd_tell(
433 rrd_file_t *rrd_file)
435 return rrd_file->pos;
439 /* Read count bytes into buffer buf, starting at rrd_file->pos.
440 * Returns the number of bytes read or <0 on error. */
442 inline ssize_t rrd_read(
443 rrd_file_t *rrd_file,
451 if (rrd_file->pos > rrd_file->file_len || _cnt == 0) /* EOF */
454 return -1; /* EINVAL */
455 _surplus = rrd_file->pos + _cnt - rrd_file->file_len;
456 if (_surplus > 0) { /* short read */
461 buf = memcpy(buf, rrd_file->file_start + rrd_file->pos, _cnt);
463 rrd_file->pos += _cnt; /* mimmic read() semantics */
468 ret = read(rrd_file->fd, buf, count);
470 rrd_file->pos += ret; /* mimmic read() semantics */
476 /* Write count bytes from buffer buf to the current position
477 * rrd_file->pos of rrd_file->fd.
478 * Returns the number of bytes written or <0 on error. */
480 inline ssize_t rrd_write(
481 rrd_file_t *rrd_file,
489 return -1; /* EINVAL */
490 memcpy(rrd_file->file_start + rrd_file->pos, buf, count);
491 rrd_file->pos += count;
492 return count; /* mimmic write() semantics */
494 ssize_t _sz = write(rrd_file->fd, buf, count);
497 rrd_file->pos += _sz;
503 /* flush all data pending to be written to FD. */
505 inline void rrd_flush(
506 rrd_file_t *rrd_file)
508 if (fdatasync(rrd_file->fd) != 0) {
509 rrd_set_error("flushing fd %d: %s", rrd_file->fd,
510 rrd_strerror(errno));
515 /* Initialize RRD header. */
520 rrd->stat_head = NULL;
523 rrd->live_head = NULL;
524 rrd->legacy_last_up = NULL;
526 rrd->pdp_prep = NULL;
527 rrd->cdp_prep = NULL;
528 rrd->rrd_value = NULL;
532 /* free RRD header data. */
538 if (rrd->legacy_last_up) { /* this gets set for version < 3 only */
539 free(rrd->live_head);
546 free(rrd->live_head);
547 free(rrd->stat_head);
553 free(rrd->rrd_value);
558 /* routine used by external libraries to free memory allocated by