1 /*****************************************************************************
2 * RRDtool 1.3.2 Copyright by Tobi Oetiker, 1997-2008
3 *****************************************************************************
4 * rrd_open.c Open an RRD File
5 *****************************************************************************
7 *****************************************************************************/
13 /* DEBUG 2 prints information obtained via mincore(2) */
15 /* do not calculate exact madvise hints but assume 1 page for headers and
16 * set DONTNEED for the rest, which is assumed to be data */
17 /* Avoid calling madvise on areas that were already hinted. May be benefical if
18 * your syscalls are very slow */
21 /* the cast to void* is there to avoid this warning seen on ia64 with certain
22 versions of gcc: 'cast increases required alignment of target type'
24 #define __rrd_read(dst, dst_t, cnt) \
25 (dst) = (dst_t*)(void*) (data + offset); \
26 offset += sizeof(dst_t) * (cnt)
28 #define __rrd_read(dst, dst_t, cnt) \
29 if ((dst = malloc(sizeof(dst_t)*(cnt))) == NULL) { \
30 rrd_set_error(#dst " malloc"); \
31 goto out_nullify_head; \
33 offset += read (rrd_file->fd, dst, sizeof(dst_t)*(cnt))
36 /* get the address of the start of this page */
37 #if defined USE_MADVISE || defined HAVE_POSIX_FADVISE
39 #define PAGE_START(addr) ((addr)&(~(_page_size-1)))
43 /* Open a database file, return its header and an open filehandle,
44 * positioned to the first cdp in the first rra.
45 * In the error path of rrd_open, only rrd_free(&rrd) has to be called
46 * before returning an error. Do not call rrd_close upon failure of rrd_open.
50 const char *const file_name,
55 mode_t mode = S_IRUSR;
59 ssize_t _page_size = sysconf(_SC_PAGESIZE);
60 int mm_prot = PROT_READ, mm_flags = 0;
65 rrd_file_t *rrd_file = NULL;
66 off_t newfile_size = 0;
68 if (rdwr & RRD_CREAT) {
69 /* yes bad inline signaling alert, we are using the
70 floatcookie to pass the size in ... only used in resize */
71 newfile_size = (off_t) rrd->stat_head->float_cookie;
75 rrd_file = malloc(sizeof(rrd_file_t));
76 if (rrd_file == NULL) {
77 rrd_set_error("allocating rrd_file descriptor for '%s'", file_name);
80 memset(rrd_file, 0, sizeof(rrd_file_t));
83 if ((rdwr & (RRD_READONLY | RRD_READWRITE)) ==
84 (RRD_READONLY | RRD_READWRITE)) {
85 /* Both READONLY and READWRITE were given, which is invalid. */
86 rrd_set_error("in read/write request mask");
90 if (rdwr & RRD_READONLY) {
93 mm_flags = MAP_PRIVATE;
95 mm_flags |= MAP_NORESERVE; /* readonly, so no swap backing needed */
99 if (rdwr & RRD_READWRITE) {
103 mm_flags = MAP_SHARED;
104 mm_prot |= PROT_WRITE;
107 if (rdwr & RRD_CREAT) {
108 flags |= (O_CREAT | O_TRUNC);
111 if (rdwr & RRD_READAHEAD) {
113 mm_flags |= MAP_POPULATE; /* populate ptes and data */
115 #if defined MAP_NONBLOCK
116 mm_flags |= MAP_NONBLOCK; /* just populate ptes */
119 #if defined(_WIN32) && !defined(__CYGWIN__) && !defined(__CYGWIN32__)
123 if ((rrd_file->fd = open(file_name, flags, mode)) < 0) {
124 rrd_set_error("opening '%s': %s", file_name, rrd_strerror(errno));
128 /* Better try to avoid seeks as much as possible. stat may be heavy but
129 * many concurrent seeks are even worse. */
130 if (newfile_size == 0 && ((fstat(rrd_file->fd, &statb)) < 0)) {
131 rrd_set_error("fstat '%s': %s", file_name, rrd_strerror(errno));
134 if (newfile_size == 0) {
135 rrd_file->file_len = statb.st_size;
137 rrd_file->file_len = newfile_size;
138 lseek(rrd_file->fd, newfile_size - 1, SEEK_SET);
139 write(rrd_file->fd, "\0", 1); /* poke */
140 lseek(rrd_file->fd, 0, SEEK_SET);
142 #ifdef HAVE_POSIX_FADVISE
143 /* In general we need no read-ahead when dealing with rrd_files.
144 When we stop reading, it is highly unlikely that we start up again.
145 In this manner we actually save time and diskaccess (and buffer cache).
146 Thanks to Dave Plonka for the Idea of using POSIX_FADV_RANDOM here. */
147 posix_fadvise(rrd_file->fd, 0, 0, POSIX_FADV_RANDOM);
151 if (rdwr & RRD_READWRITE)
153 if (setvbuf((rrd_file->fd),NULL,_IONBF,2)) {
154 rrd_set_error("failed to disable the stream buffer\n");
160 data = mmap(0, rrd_file->file_len, mm_prot, mm_flags,
161 rrd_file->fd, offset);
163 /* lets see if the first read worked */
164 if (data == MAP_FAILED) {
165 rrd_set_error("mmaping file '%s': %s", file_name,
166 rrd_strerror(errno));
169 rrd_file->file_start = data;
170 if (rdwr & RRD_CREAT) {
171 memset(data, DNAN, newfile_size - 1);
175 if (rdwr & RRD_CREAT)
178 if (rdwr & RRD_COPY) {
179 /* We will read everything in a moment (copying) */
180 madvise(data, rrd_file->file_len, MADV_WILLNEED | MADV_SEQUENTIAL);
182 /* We do not need to read anything in for the moment */
183 madvise(data, rrd_file->file_len, MADV_RANDOM);
184 /* the stat_head will be needed soonish, so hint accordingly */
185 madvise(data, sizeof(stat_head_t), MADV_WILLNEED | MADV_RANDOM);
189 __rrd_read(rrd->stat_head, stat_head_t,
192 /* lets do some test if we are on track ... */
193 if (memcmp(rrd->stat_head->cookie, RRD_COOKIE, sizeof(RRD_COOKIE)) != 0) {
194 rrd_set_error("'%s' is not an RRD file", file_name);
195 goto out_nullify_head;
198 if (rrd->stat_head->float_cookie != FLOAT_COOKIE) {
199 rrd_set_error("This RRD was created on another architecture");
200 goto out_nullify_head;
203 version = atoi(rrd->stat_head->version);
205 if (version > atoi(RRD_VERSION)) {
206 rrd_set_error("can't handle RRD file version %s",
207 rrd->stat_head->version);
208 goto out_nullify_head;
210 #if defined USE_MADVISE
211 /* the ds_def will be needed soonish, so hint accordingly */
212 madvise(data + PAGE_START(offset),
213 sizeof(ds_def_t) * rrd->stat_head->ds_cnt, MADV_WILLNEED);
215 __rrd_read(rrd->ds_def, ds_def_t,
216 rrd->stat_head->ds_cnt);
218 #if defined USE_MADVISE
219 /* the rra_def will be needed soonish, so hint accordingly */
220 madvise(data + PAGE_START(offset),
221 sizeof(rra_def_t) * rrd->stat_head->rra_cnt, MADV_WILLNEED);
223 __rrd_read(rrd->rra_def, rra_def_t,
224 rrd->stat_head->rra_cnt);
226 /* handle different format for the live_head */
228 rrd->live_head = (live_head_t *) malloc(sizeof(live_head_t));
229 if (rrd->live_head == NULL) {
230 rrd_set_error("live_head_t malloc");
233 #if defined USE_MADVISE
234 /* the live_head will be needed soonish, so hint accordingly */
235 madvise(data + PAGE_START(offset), sizeof(time_t), MADV_WILLNEED);
237 __rrd_read(rrd->legacy_last_up, time_t,
240 rrd->live_head->last_up = *rrd->legacy_last_up;
241 rrd->live_head->last_up_usec = 0;
243 #if defined USE_MADVISE
244 /* the live_head will be needed soonish, so hint accordingly */
245 madvise(data + PAGE_START(offset),
246 sizeof(live_head_t), MADV_WILLNEED);
248 __rrd_read(rrd->live_head, live_head_t,
251 __rrd_read(rrd->pdp_prep, pdp_prep_t,
252 rrd->stat_head->ds_cnt);
253 __rrd_read(rrd->cdp_prep, cdp_prep_t,
254 rrd->stat_head->rra_cnt * rrd->stat_head->ds_cnt);
255 __rrd_read(rrd->rra_ptr, rra_ptr_t,
256 rrd->stat_head->rra_cnt);
258 rrd_file->header_len = offset;
259 rrd_file->pos = offset;
263 rrd->stat_head = NULL;
272 #if defined DEBUG && DEBUG > 1
273 /* Print list of in-core pages of a the current rrd_file. */
276 rrd_file_t *rrd_file,
280 /* pretty print blocks in core */
283 ssize_t _page_size = sysconf(_SC_PAGESIZE);
285 off = rrd_file->file_len +
286 ((rrd_file->file_len + _page_size - 1) / _page_size);
290 if (mincore(rrd_file->file_start, rrd_file->file_len, vec) == 0) {
292 unsigned is_in = 0, was_in = 0;
294 for (off = 0, prev = 0; off < rrd_file->file_len; ++off) {
295 is_in = vec[off] & 1; /* if lsb set then is core resident */
298 if (was_in != is_in) {
299 fprintf(stderr, "%s: %sin core: %p len %ld\n", mark,
300 was_in ? "" : "not ", vec + prev, off - prev);
306 "%s: %sin core: %p len %ld\n", mark,
307 was_in ? "" : "not ", vec + prev, off - prev);
309 fprintf(stderr, "mincore: %s", rrd_strerror(errno));
312 fprintf(stderr, "sorry mincore only works with mmap");
315 #endif /* defined DEBUG && DEBUG > 1 */
318 /* drop cache except for the header and the active pages */
320 rrd_file_t *rrd_file,
323 #if defined USE_MADVISE || defined HAVE_POSIX_FADVISE
324 unsigned long dontneed_start;
325 unsigned long rra_start;
326 unsigned long active_block;
328 ssize_t _page_size = sysconf(_SC_PAGESIZE);
330 #if defined DEBUG && DEBUG > 1
331 mincore_print(rrd_file, "before");
334 /* ignoring errors from RRDs that are smaller then the file_len+rounding */
335 rra_start = rrd_file->header_len;
336 dontneed_start = PAGE_START(rra_start) + _page_size;
337 for (i = 0; i < rrd->stat_head->rra_cnt; ++i) {
340 + rrd->rra_ptr[i].cur_row
341 * rrd->stat_head->ds_cnt * sizeof(rrd_value_t));
342 if (active_block > dontneed_start) {
344 madvise(rrd_file->file_start + dontneed_start,
345 active_block - dontneed_start - 1, MADV_DONTNEED);
347 /* in linux at least only fadvise DONTNEED seems to purge pages from cache */
348 #ifdef HAVE_POSIX_FADVISE
349 posix_fadvise(rrd_file->fd, dontneed_start,
350 active_block - dontneed_start - 1,
351 POSIX_FADV_DONTNEED);
354 dontneed_start = active_block;
355 /* do not release 'hot' block if update for this RAA will occur
356 * within 10 minutes */
357 if (rrd->stat_head->pdp_step * rrd->rra_def[i].pdp_cnt -
358 rrd->live_head->last_up % (rrd->stat_head->pdp_step *
359 rrd->rra_def[i].pdp_cnt) < 10 * 60) {
360 dontneed_start += _page_size;
363 rrd->rra_def[i].row_cnt * rrd->stat_head->ds_cnt *
367 if (dontneed_start < rrd_file->file_len) {
369 madvise(rrd_file->file_start + dontneed_start,
370 rrd_file->file_len - dontneed_start, MADV_DONTNEED);
372 #ifdef HAVE_POSIX_FADVISE
373 posix_fadvise(rrd_file->fd, dontneed_start,
374 rrd_file->file_len - dontneed_start,
375 POSIX_FADV_DONTNEED);
379 #if defined DEBUG && DEBUG > 1
380 mincore_print(rrd_file, "after");
382 #endif /* without madvise and posix_fadvise ist does not make much sense todo anything */
390 rrd_file_t *rrd_file)
395 ret = msync(rrd_file->file_start, rrd_file->file_len, MS_ASYNC);
397 rrd_set_error("msync rrd_file: %s", rrd_strerror(errno));
398 ret = munmap(rrd_file->file_start, rrd_file->file_len);
400 rrd_set_error("munmap rrd_file: %s", rrd_strerror(errno));
402 ret = close(rrd_file->fd);
404 rrd_set_error("closing file: %s", rrd_strerror(errno));
411 /* Set position of rrd_file. */
414 rrd_file_t *rrd_file,
421 if (whence == SEEK_SET)
423 else if (whence == SEEK_CUR)
424 rrd_file->pos += off;
425 else if (whence == SEEK_END)
426 rrd_file->pos = rrd_file->file_len + off;
428 ret = lseek(rrd_file->fd, off, whence);
430 rrd_set_error("lseek: %s", rrd_strerror(errno));
433 /* mimic fseek, which returns 0 upon success */
434 return ret < 0; /*XXX: or just ret to mimic lseek */
438 /* Get current position in rrd_file. */
441 rrd_file_t *rrd_file)
443 return rrd_file->pos;
447 /* Read count bytes into buffer buf, starting at rrd_file->pos.
448 * Returns the number of bytes read or <0 on error. */
451 rrd_file_t *rrd_file,
459 if (rrd_file->pos > rrd_file->file_len || _cnt == 0) /* EOF */
462 return -1; /* EINVAL */
463 _surplus = rrd_file->pos + _cnt - rrd_file->file_len;
464 if (_surplus > 0) { /* short read */
469 buf = memcpy(buf, rrd_file->file_start + rrd_file->pos, _cnt);
471 rrd_file->pos += _cnt; /* mimmic read() semantics */
476 ret = read(rrd_file->fd, buf, count);
478 rrd_file->pos += ret; /* mimmic read() semantics */
484 /* Write count bytes from buffer buf to the current position
485 * rrd_file->pos of rrd_file->fd.
486 * Returns the number of bytes written or <0 on error. */
489 rrd_file_t *rrd_file,
497 return -1; /* EINVAL */
498 memcpy(rrd_file->file_start + rrd_file->pos, buf, count);
499 rrd_file->pos += count;
500 return count; /* mimmic write() semantics */
502 ssize_t _sz = write(rrd_file->fd, buf, count);
505 rrd_file->pos += _sz;
511 /* flush all data pending to be written to FD. */
514 rrd_file_t *rrd_file)
516 if (fdatasync(rrd_file->fd) != 0) {
517 rrd_set_error("flushing fd %d: %s", rrd_file->fd,
518 rrd_strerror(errno));
523 /* Initialize RRD header. */
528 rrd->stat_head = NULL;
531 rrd->live_head = NULL;
532 rrd->legacy_last_up = NULL;
534 rrd->pdp_prep = NULL;
535 rrd->cdp_prep = NULL;
536 rrd->rrd_value = NULL;
540 /* free RRD header data. */
546 if (rrd->legacy_last_up) { /* this gets set for version < 3 only */
547 free(rrd->live_head);
554 free(rrd->live_head);
555 free(rrd->stat_head);
561 free(rrd->rrd_value);
566 /* routine used by external libraries to free memory allocated by