/*****************************************************************************
- * RRDtool 1.2.23 Copyright by Tobi Oetiker, 1997-2007
+ * RRDtool 1.2.99907080300 Copyright by Tobi Oetiker, 1997-2007
*****************************************************************************
* rrd_open.c Open an RRD File
*****************************************************************************
#define MEMBLK 8192
/* DEBUG 2 prints information obtained via mincore(2) */
-// #define DEBUG 2
+#define DEBUG 1
/* do not calculate exact madvise hints but assume 1 page for headers and
* set DONTNEED for the rest, which is assumed to be data */
-//#define ONE_PAGE 1
/* Avoid calling madvise on areas that were already hinted. May be benefical if
* your syscalls are very slow */
-#define CHECK_MADVISE_OVERLAPS 1
#ifdef HAVE_MMAP
/* the cast to void* is there to avoid this warning seen on ia64 with certain
offset += read (rrd_file->fd, dst, sizeof(dst_t)*(cnt))
#endif
-/* next page-aligned (i.e. page-align up) */
-#ifndef PAGE_ALIGN
-#define PAGE_ALIGN(addr) (((addr)+_page_size-1)&(~(_page_size-1)))
-#endif
-/* previous page-aligned (i.e. page-align down) */
-#ifndef PAGE_ALIGN_DOWN
-#define PAGE_ALIGN_DOWN(addr) (((addr)+_page_size-1)&(~(_page_size-1)))
-#endif
-
-#ifdef HAVE_MMAP
-/* vector of last madvise hint */
-typedef struct _madvise_vec_t {
- void *start;
- ssize_t length;
-} _madvise_vec_t;
-_madvise_vec_t _madv_vec = { NULL, 0 };
+/* get the address of the start of this page */
+#ifndef PAGE_START
+#define PAGE_START(addr) ((addr)&(~(_page_size-1)))
#endif
-#if defined CHECK_MADVISE_OVERLAPS
-#define _madvise(_start, _off, _hint) \
- if ((_start) != _madv_vec.start && (ssize_t)(_off) != _madv_vec.length) { \
- _madv_vec.start = (_start) ; _madv_vec.length = (_off); \
- madvise((_start), (_off), (_hint)); \
- }
-#else
-#define _madvise(_start, _off, _hint) \
- madvise((_start), (_off), (_hint))
-#endif
/* Open a database file, return its header and an open filehandle,
* positioned to the first cdp in the first rra.
off_t newfile_size = 0;
if (rdwr & RRD_CREAT) {
+ /* yes bad inline signaling alert, we are using the
+ floatcookie to pass the size in ... only used in resize */
newfile_size = (off_t) rrd->stat_head->float_cookie;
free(rrd->stat_head);
}
#if defined MAP_NONBLOCK
mm_flags |= MAP_NONBLOCK; /* just populate ptes */
#endif
-#ifdef USE_DIRECT_IO
- } else {
- flags |= O_DIRECT;
-#endif
}
-#ifdef O_NONBLOCK
- flags |= O_NONBLOCK;
-#endif
if ((rrd_file->fd = open(file_name, flags, mode)) < 0) {
rrd_set_error("opening '%s': %s", file_name, rrd_strerror(errno));
#ifdef USE_MADVISE
if (rdwr & RRD_COPY) {
/* We will read everything in a moment (copying) */
- _madvise(data, rrd_file->file_len, MADV_WILLNEED | MADV_SEQUENTIAL);
+ madvise(data, rrd_file->file_len, MADV_WILLNEED | MADV_SEQUENTIAL);
} else {
-# ifndef ONE_PAGE
/* We do not need to read anything in for the moment */
- _madvise(data, rrd_file->file_len, MADV_DONTNEED);
+ madvise(data, rrd_file->file_len, MADV_RANDOM);
/* the stat_head will be needed soonish, so hint accordingly */
- _madvise(data + PAGE_ALIGN_DOWN(offset),
- PAGE_ALIGN(sizeof(stat_head_t)),
- MADV_WILLNEED | MADV_RANDOM);
-
-# else
-/* alternatively: keep 1 page worth of data, likely headers,
- * don't need the rest. */
- _madvise(data, _page_size, MADV_WILLNEED | MADV_SEQUENTIAL);
- _madvise(data + _page_size, (rrd_file->file_len >= _page_size)
- ? rrd_file->file_len - _page_size : 0, MADV_DONTNEED);
-# endif
+ madvise(data, sizeof(stat_head_t), MADV_WILLNEED | MADV_RANDOM);
}
#endif
rrd->stat_head->version);
goto out_nullify_head;
}
-#if defined USE_MADVISE && !defined ONE_PAGE
+#if defined USE_MADVISE
/* the ds_def will be needed soonish, so hint accordingly */
- _madvise(data + PAGE_ALIGN_DOWN(offset),
- PAGE_ALIGN(sizeof(ds_def_t) * rrd->stat_head->ds_cnt),
- MADV_WILLNEED);
+ madvise(data + PAGE_START(offset),
+ sizeof(ds_def_t) * rrd->stat_head->ds_cnt, MADV_WILLNEED);
#endif
__rrd_read(rrd->ds_def, ds_def_t,
rrd->stat_head->ds_cnt);
-#if defined USE_MADVISE && !defined ONE_PAGE
+#if defined USE_MADVISE
/* the rra_def will be needed soonish, so hint accordingly */
- _madvise(data + PAGE_ALIGN_DOWN(offset),
- PAGE_ALIGN(sizeof(rra_def_t) * rrd->stat_head->rra_cnt),
- MADV_WILLNEED);
+ madvise(data + PAGE_START(offset),
+ sizeof(rra_def_t) * rrd->stat_head->rra_cnt, MADV_WILLNEED);
#endif
__rrd_read(rrd->rra_def, rra_def_t,
rrd->stat_head->rra_cnt);
#endif
rrd->live_head->last_up_usec = 0;
} else {
-#if defined USE_MADVISE && !defined ONE_PAGE
+#if defined USE_MADVISE
/* the live_head will be needed soonish, so hint accordingly */
- _madvise(data + PAGE_ALIGN_DOWN(offset),
- PAGE_ALIGN(sizeof(live_head_t)), MADV_WILLNEED);
+ madvise(data + PAGE_START(offset),
+ sizeof(live_head_t), MADV_WILLNEED);
#endif
__rrd_read(rrd->live_head, live_head_t,
1);
/* Close a reference to an rrd_file. */
-
-int rrd_close(
- rrd_file_t *rrd_file)
+static
+void mincore_print(
+ rrd_file_t *rrd_file,
+ char *mark)
{
- int ret;
-
-#if defined HAVE_MMAP || defined DEBUG
- ssize_t _page_size = sysconf(_SC_PAGESIZE);
-#endif
-#if defined DEBUG && DEBUG > 1
+#ifdef HAVE_MMAP
/* pretty print blocks in core */
off_t off;
unsigned char *vec;
+ ssize_t _page_size = sysconf(_SC_PAGESIZE);
off = rrd_file->file_len +
((rrd_file->file_len + _page_size - 1) / _page_size);
if (off == 0)
was_in = is_in;
if (was_in != is_in) {
- fprintf(stderr, "%sin core: %p len %ld\n",
+ fprintf(stderr, "%s: %sin core: %p len %ld\n", mark,
was_in ? "" : "not ", vec + prev, off - prev);
was_in = is_in;
prev = off;
}
}
fprintf(stderr,
- "%sin core: %p len %ld\n",
+ "%s: %sin core: %p len %ld\n", mark,
was_in ? "" : "not ", vec + prev, off - prev);
} else
fprintf(stderr, "mincore: %s", rrd_strerror(errno));
}
-#endif /* DEBUG */
+#else
+ fprintf(stderr, "sorry mincore only works with mmap");
+#endif
+}
+
+
+/* drop cache except for the header and the active pages */
+void rrd_dontneed(
+ rrd_file_t *rrd_file,
+ rrd_t *rrd)
+{
+ unsigned long dontneed_start;
+ unsigned long rra_start;
+ unsigned long active_block;
+ unsigned long i;
+ ssize_t _page_size = sysconf(_SC_PAGESIZE);
+
+#if defined DEBUG && DEBUG > 1
+ mincore_print(rrd_file, "before");
+#endif
-#ifdef USE_MADVISE
-# ifdef ONE_PAGE
- /* Keep headers around, round up to next page boundary. */
- ret =
- PAGE_ALIGN(rrd_file->header_len % _page_size + rrd_file->header_len);
- if (rrd_file->file_len > ret)
- _madvise(rrd_file->file_start + ret,
- rrd_file->file_len - ret, MADV_DONTNEED);
-# else
/* ignoring errors from RRDs that are smaller then the file_len+rounding */
- _madvise(rrd_file->file_start + PAGE_ALIGN_DOWN(rrd_file->header_len),
- rrd_file->file_len - PAGE_ALIGN(rrd_file->header_len),
- MADV_DONTNEED);
-# endif
+ rra_start = rrd_file->header_len;
+ dontneed_start = PAGE_START(rra_start) + _page_size;
+ for (i = 0; i < rrd->stat_head->rra_cnt; ++i) {
+ active_block =
+ PAGE_START(rra_start
+ + rrd->rra_ptr[i].cur_row
+ * rrd->stat_head->ds_cnt * sizeof(rrd_value_t));
+ if (active_block > dontneed_start) {
+#ifdef USE_MADVISE
+ madvise(rrd_file->file_start + dontneed_start,
+ active_block - dontneed_start - 1, MADV_DONTNEED);
+#endif
+/* in linux at least only fadvise DONTNEED seems to purge pages from cache */
+#ifdef HAVE_POSIX_FADVISE
+ posix_fadvise(rrd_file->fd, dontneed_start,
+ active_block - dontneed_start - 1,
+ POSIX_FADV_DONTNEED);
+#endif
+ }
+ dontneed_start = active_block;
+ /* do not relase 'hot' block if update for this RAA will occure within 10 minutes */
+ if (rrd->stat_head->pdp_step * rrd->rra_def[i].pdp_cnt -
+ rrd->live_head->last_up % (rrd->stat_head->pdp_step *
+ rrd->rra_def[i].pdp_cnt) < 10 * 60) {
+ dontneed_start += _page_size;
+ }
+ rra_start +=
+ rrd->rra_def[i].row_cnt * rrd->stat_head->ds_cnt *
+ sizeof(rrd_value_t);
+ }
+#ifdef USE_MADVISE
+ madvise(rrd_file->file_start + dontneed_start,
+ rrd_file->file_len - dontneed_start, MADV_DONTNEED);
+#endif
+#ifdef HAVE_POSIX_FADVISE
+ posix_fadvise(rrd_file->fd, dontneed_start,
+ rrd_file->file_len - dontneed_start, POSIX_FADV_DONTNEED);
#endif
+#if defined DEBUG && DEBUG > 1
+ mincore_print(rrd_file, "after");
+#endif
+}
+
+int rrd_close(
+ rrd_file_t *rrd_file)
+{
+ int ret;
+
#ifdef HAVE_MMAP
+ ret = msync(rrd_file->file_start, rrd_file->file_len,MS_ASYNC);
+ if (ret != 0)
+ rrd_set_error("msync rrd_file: %s", rrd_strerror(errno));
ret = munmap(rrd_file->file_start, rrd_file->file_len);
if (ret != 0)
rrd_set_error("munmap rrd_file: %s", rrd_strerror(errno));
{
#ifdef HAVE_MMAP
size_t _cnt = count;
- ssize_t _surplus = rrd_file->pos + _cnt - rrd_file->file_len;
+ ssize_t _surplus;
+ if (rrd_file->pos > rrd_file->file_len || _cnt == 0) /* EOF */
+ return 0;
+ if (buf == NULL)
+ return -1; /* EINVAL */
+ _surplus = rrd_file->pos + _cnt - rrd_file->file_len;
if (_surplus > 0) { /* short read */
_cnt -= _surplus;
}
/* write count bytes from buffer buf to the current position
* rrd_file->pos of rrd_file->fd.
- * Returns the number of bytes written. */
+ * Returns the number of bytes written or <0 on error. */
inline ssize_t rrd_write(
rrd_file_t *rrd_file,
size_t count)
{
#ifdef HAVE_MMAP
+ if (count == 0)
+ return 0;
+ if (buf == NULL)
+ return -1; /* EINVAL */
memcpy(rrd_file->file_start + rrd_file->pos, buf, count);
rrd_file->pos += count;
return count; /* mimmic write() semantics */